strip DMA and RoCC CSRs out of rocket and uncore (#201)
This commit is contained in:
		
				
					committed by
					
						 Andrew Waterman
						Andrew Waterman
					
				
			
			
				
	
			
			
			
						parent
						
							47a0c880a4
						
					
				
				
					commit
					38e0967816
				
			| @@ -101,7 +101,6 @@ class BaseCoreplexConfig extends Config ( | ||||
|       case BuildRoCC => Nil | ||||
|       case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _) | ||||
|       case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _) | ||||
|       case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _) | ||||
|       //Rocket Core Constants | ||||
|       case FetchWidth => if (site(UseCompressed)) 2 else 1 | ||||
|       case RetireWidth => 1 | ||||
|   | ||||
| @@ -49,7 +49,6 @@ class WithGroundTest extends Config( | ||||
|             case GroundTestId => i | ||||
|             case NCachedTileLinkPorts => if(tileSettings.cached > 0) 1 else 0 | ||||
|             case NUncachedTileLinkPorts => tileSettings.uncached | ||||
|             case RoccNCSRs => tileSettings.csrs | ||||
|           }))) | ||||
|         } | ||||
|       } | ||||
|   | ||||
| @@ -11,8 +11,7 @@ import cde.{Parameters, Field} | ||||
| case object BuildGroundTest extends Field[Parameters => GroundTest] | ||||
|  | ||||
| case class GroundTestTileSettings( | ||||
|   uncached: Int = 0, cached: Int = 0, ptw: Int = 0, | ||||
|   maxXacts: Int = 1, csrs: Int = 0) | ||||
|   uncached: Int = 0, cached: Int = 0, ptw: Int = 0, maxXacts: Int = 1) | ||||
| case object GroundTestKey extends Field[Seq[GroundTestTileSettings]] | ||||
| case object GroundTestId extends Field[Int] | ||||
|  | ||||
|   | ||||
| @@ -332,11 +332,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) | ||||
|     read_mapping += addr -> io.custom_mrw_csrs(i) | ||||
|   } | ||||
|  | ||||
|   for ((addr, i) <- roccCsrs.zipWithIndex) { | ||||
|     require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use") | ||||
|     read_mapping += addr -> io.rocc.csr.rdata(i) | ||||
|   } | ||||
|  | ||||
|   val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) } | ||||
|  | ||||
|   val addr_valid = decoded_addr.values.reduce(_||_) | ||||
| @@ -559,10 +554,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) | ||||
|   reg_dcsr.debugint := io.prci.interrupts.debug | ||||
|   reg_dcsr.hwbpcount := UInt(p(NBreakpoints)) | ||||
|  | ||||
|   io.rocc.csr.waddr := io.rw.addr | ||||
|   io.rocc.csr.wdata := wdata | ||||
|   io.rocc.csr.wen := wen | ||||
|  | ||||
|   if (!usingUser) { | ||||
|     reg_mstatus.mpp := PRV.M | ||||
|     reg_mstatus.prv := PRV.M | ||||
|   | ||||
| @@ -1,400 +0,0 @@ | ||||
| package rocket | ||||
|  | ||||
| import Chisel._ | ||||
| import uncore.tilelink._ | ||||
| import uncore.devices._ | ||||
| import uncore.devices.DmaRequest._ | ||||
| import uncore.agents._ | ||||
| import uncore.util._ | ||||
| import junctions.{ParameterizedBundle, AddrMap} | ||||
| import cde.Parameters | ||||
|  | ||||
| trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { | ||||
|   val dmaAddrBits = coreMaxAddrBits | ||||
|   val dmaSegmentSizeBits = coreMaxAddrBits | ||||
|   val dmaSegmentBits = 24 | ||||
| } | ||||
|  | ||||
| abstract class ClientDmaBundle(implicit val p: Parameters) | ||||
|   extends ParameterizedBundle()(p) with HasClientDmaParameters | ||||
| abstract class ClientDmaModule(implicit val p: Parameters) | ||||
|   extends Module with HasClientDmaParameters | ||||
|  | ||||
| class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) { | ||||
|   val cmd = UInt(width = DMA_CMD_SZ) | ||||
|   val src_start  = UInt(width = dmaAddrBits) | ||||
|   val dst_start  = UInt(width = dmaAddrBits) | ||||
|   val src_stride = UInt(width = dmaSegmentSizeBits) | ||||
|   val dst_stride = UInt(width = dmaSegmentSizeBits) | ||||
|   val segment_size = UInt(width = dmaSegmentSizeBits) | ||||
|   val nsegments  = UInt(width = dmaSegmentBits) | ||||
|   val word_size  = UInt(width = dmaWordSizeBits) | ||||
| } | ||||
|  | ||||
| object ClientDmaRequest { | ||||
|   def apply(cmd: UInt, | ||||
|             src_start: UInt, | ||||
|             dst_start: UInt, | ||||
|             segment_size: UInt, | ||||
|             nsegments: UInt = UInt(1), | ||||
|             src_stride: UInt = UInt(0), | ||||
|             dst_stride: UInt = UInt(0), | ||||
|             word_size: UInt = UInt(0)) | ||||
|       (implicit p: Parameters) = { | ||||
|     val req = Wire(new ClientDmaRequest) | ||||
|     req.cmd := cmd | ||||
|     req.src_start := src_start | ||||
|     req.dst_start := dst_start | ||||
|     req.src_stride := src_stride | ||||
|     req.dst_stride := dst_stride | ||||
|     req.segment_size := segment_size | ||||
|     req.nsegments := nsegments | ||||
|     req.word_size := word_size | ||||
|     req | ||||
|   } | ||||
| } | ||||
|  | ||||
| object ClientDmaResponse { | ||||
|   val pagefault = UInt("b01") | ||||
|   val invalid_region = UInt("b10") | ||||
|  | ||||
|   def apply(status: UInt = UInt(0))(implicit p: Parameters) = { | ||||
|     val resp = Wire(new ClientDmaResponse) | ||||
|     resp.status := status | ||||
|     resp | ||||
|   } | ||||
| } | ||||
|  | ||||
| class ClientDmaResponse(implicit p: Parameters) extends ClientDmaBundle { | ||||
|   val status = UInt(width = dmaStatusBits) | ||||
| } | ||||
|  | ||||
| class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) { | ||||
|   val req = Decoupled(new ClientDmaRequest) | ||||
|   val resp = Valid(new ClientDmaResponse).flip | ||||
| } | ||||
|  | ||||
| class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) | ||||
|     with HasClientDmaParameters with HasTileLinkParameters { | ||||
|   val io = new Bundle { | ||||
|     val cpu = (new ClientDmaIO).flip | ||||
|     val mem = new ClientUncachedTileLinkIO | ||||
|     val ptw = new TLBPTWIO | ||||
|     val busy = Bool(OUTPUT) | ||||
|     val incr_outstanding = Bool(OUTPUT) | ||||
|     val host_id = UInt(INPUT, log2Up(nCores)) | ||||
|   } | ||||
|  | ||||
|   val tlb = Module(new DecoupledTLB()(p.alterPartial({ | ||||
|     case CacheName => "L1D" | ||||
|   }))) | ||||
|   io.ptw <> tlb.io.ptw | ||||
|  | ||||
|   private val pgSize = 1 << pgIdxBits | ||||
|  | ||||
|   val cmd = Reg(UInt(width = DMA_CMD_SZ)) | ||||
|   val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq( | ||||
|     DMA_CMD_PFR -> UInt("b10"), | ||||
|     DMA_CMD_PFW -> UInt("b10"), | ||||
|     DMA_CMD_SIN -> UInt("b10"), | ||||
|     DMA_CMD_SOUT -> UInt("b01"))) | ||||
|  | ||||
|   val segment_size = Reg(UInt(width = dmaSegmentSizeBits)) | ||||
|   val bytes_left = Reg(UInt(width = dmaSegmentSizeBits)) | ||||
|   val segments_left = Reg(UInt(width = dmaSegmentBits)) | ||||
|   val word_size = Reg(UInt(width = dmaWordSizeBits)) | ||||
|  | ||||
|   val src_vaddr = Reg(UInt(width = dmaAddrBits)) | ||||
|   val dst_vaddr = Reg(UInt(width = dmaAddrBits)) | ||||
|   val src_vpn = src_vaddr(dmaAddrBits - 1, pgIdxBits) | ||||
|   val dst_vpn = dst_vaddr(dmaAddrBits - 1, pgIdxBits) | ||||
|   val src_idx = src_vaddr(pgIdxBits - 1, 0) | ||||
|   val dst_idx = dst_vaddr(pgIdxBits - 1, 0) | ||||
|   val src_pglen = UInt(pgSize) - src_idx | ||||
|   val dst_pglen = UInt(pgSize) - dst_idx | ||||
|  | ||||
|   val src_stride = Reg(UInt(width = dmaSegmentSizeBits)) | ||||
|   val dst_stride = Reg(UInt(width = dmaSegmentSizeBits)) | ||||
|  | ||||
|   val src_ppn = Reg(UInt(width = ppnBits)) | ||||
|   val dst_ppn = Reg(UInt(width = ppnBits)) | ||||
|  | ||||
|   val src_paddr = Cat(src_ppn, src_idx) | ||||
|   val dst_paddr = Cat(dst_ppn, dst_idx) | ||||
|  | ||||
|   val last_src_vpn = Reg(UInt(width = vpnBits)) | ||||
|   val last_dst_vpn = Reg(UInt(width = vpnBits)) | ||||
|  | ||||
|   val tx_len = src_pglen min dst_pglen min bytes_left | ||||
|  | ||||
|   val dma_busy = Reg(init = UInt(0, tlMaxClientXacts)) | ||||
|   val dma_xact_id = PriorityEncoder(~dma_busy) | ||||
|   val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats) | ||||
|  | ||||
|   val (s_idle :: s_translate :: s_dma_req :: s_dma_update :: | ||||
|        s_prepare :: s_finish :: Nil) = Enum(Bits(), 6) | ||||
|   val state = Reg(init = s_idle) | ||||
|  | ||||
|   // lower bit is for src, higher bit is for dst | ||||
|   val to_translate = Reg(init = UInt(0, 2)) | ||||
|   val tlb_sent = Reg(init = UInt(0, 2)) | ||||
|   val tlb_to_send = to_translate & ~tlb_sent | ||||
|   val resp_status = Reg(UInt(width = dmaStatusBits)) | ||||
|  | ||||
|   def make_acquire( | ||||
|       addr_beat: UInt, client_xact_id: UInt, client_id: UInt, | ||||
|       cmd: UInt, source: UInt, dest: UInt, | ||||
|       length: UInt, size: UInt): Acquire = { | ||||
|  | ||||
|     val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits)) | ||||
|     data_blob := DmaRequest( | ||||
|       xact_id = UInt(0), | ||||
|       client_id = client_id, | ||||
|       cmd = cmd, | ||||
|       source = source, | ||||
|       dest = dest, | ||||
|       length = length, | ||||
|       size = size).asUInt | ||||
|     val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob) | ||||
|     val base_addr = addrMap("devices:dma").start | ||||
|     val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits)) | ||||
|  | ||||
|     PutBlock( | ||||
|       client_xact_id = client_xact_id, | ||||
|       addr_block = addr_block, | ||||
|       addr_beat = addr_beat, | ||||
|       data = data_beats(addr_beat), | ||||
|       alloc = Bool(false)) | ||||
|   } | ||||
|  | ||||
|   def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = { | ||||
|     val src_cacheable = addrMap.isCacheable(src) | ||||
|     val dst_cacheable = addrMap.isCacheable(dst) | ||||
|     val dst_ok = Mux(cmd === DMA_CMD_SOUT, !dst_cacheable, dst_cacheable) | ||||
|     val src_ok = Mux(cmd === DMA_CMD_SIN,  !src_cacheable, Bool(true)) | ||||
|     dst_ok && src_ok | ||||
|   } | ||||
|  | ||||
|   tlb.io.req.valid := tlb_to_send.orR | ||||
|   tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn) | ||||
|   tlb.io.req.bits.passthrough := Bool(false) | ||||
|   tlb.io.req.bits.instruction := Bool(false) | ||||
|   tlb.io.req.bits.store := !tlb_to_send(0) | ||||
|   tlb.io.resp.ready := tlb_sent.orR | ||||
|  | ||||
|   when (tlb.io.req.fire()) { | ||||
|     tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send) | ||||
|   } | ||||
|  | ||||
|   when (tlb.io.resp.fire()) { | ||||
|     val recv_choice = PriorityEncoderOH(to_translate) | ||||
|     val error = Mux(recv_choice(0), | ||||
|       tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st) | ||||
|  | ||||
|     when (error) { | ||||
|       resp_status := ClientDmaResponse.pagefault | ||||
|       state := s_finish | ||||
|     } | ||||
|  | ||||
|     // getting the src translation | ||||
|     when (recv_choice(0)) { | ||||
|       src_ppn := tlb.io.resp.bits.ppn | ||||
|     } .otherwise { | ||||
|       dst_ppn := tlb.io.resp.bits.ppn | ||||
|     } | ||||
|  | ||||
|     to_translate := to_translate & ~recv_choice | ||||
|   } | ||||
|  | ||||
|   io.cpu.req.ready := state === s_idle | ||||
|   io.cpu.resp.valid := state === s_finish | ||||
|   io.cpu.resp.bits := ClientDmaResponse(resp_status) | ||||
|  | ||||
|   io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR | ||||
|   io.mem.acquire.bits := make_acquire( | ||||
|     addr_beat = dma_req_beat, | ||||
|     client_id = io.host_id, | ||||
|     client_xact_id = dma_xact_id, | ||||
|     cmd = cmd, source = src_paddr, dest = dst_paddr, | ||||
|     length = tx_len, size = word_size) | ||||
|  | ||||
|   io.mem.grant.ready := (state =/= s_dma_req) | ||||
|  | ||||
|   when (io.cpu.req.fire()) { | ||||
|     val req = io.cpu.req.bits | ||||
|     val is_prefetch = req.cmd(2, 1) === UInt("b01") | ||||
|     cmd := req.cmd | ||||
|     src_vaddr := req.src_start | ||||
|     dst_vaddr := req.dst_start | ||||
|     src_stride := req.src_stride | ||||
|     dst_stride := req.dst_stride | ||||
|     segment_size := req.segment_size | ||||
|     segments_left := req.nsegments - UInt(1) | ||||
|     bytes_left := req.segment_size | ||||
|     word_size := req.word_size | ||||
|     to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11")) | ||||
|     tlb_sent := UInt(0) | ||||
|     state := s_translate | ||||
|   } | ||||
|  | ||||
|   when (state === s_translate && !to_translate.orR) { | ||||
|     when (check_region(cmd, src_paddr, dst_paddr)) { | ||||
|       state := s_dma_req | ||||
|     } .otherwise { | ||||
|       resp_status := ClientDmaResponse.invalid_region | ||||
|       state := s_finish | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   def setBusy(set: Bool, xact_id: UInt): UInt = | ||||
|     Mux(set, UIntToOH(xact_id), UInt(0)) | ||||
|  | ||||
|   dma_busy := (dma_busy | | ||||
|                 setBusy(dma_req_done, dma_xact_id)) & | ||||
|                 ~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id) | ||||
|  | ||||
|  | ||||
|   when (dma_req_done) { | ||||
|     src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0)) | ||||
|     dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0)) | ||||
|     bytes_left := bytes_left - tx_len | ||||
|     state := s_dma_update | ||||
|   } | ||||
|  | ||||
|   when (state === s_dma_update) { | ||||
|     when (bytes_left === UInt(0)) { | ||||
|       when (segments_left === UInt(0)) { | ||||
|         resp_status := UInt(0) | ||||
|         state := s_finish | ||||
|       } .otherwise { | ||||
|         last_src_vpn := src_vpn | ||||
|         last_dst_vpn := dst_vpn | ||||
|         src_vaddr := src_vaddr + src_stride | ||||
|         dst_vaddr := dst_vaddr + dst_stride | ||||
|         bytes_left := segment_size | ||||
|         segments_left := segments_left - UInt(1) | ||||
|         state := s_prepare | ||||
|       } | ||||
|     } .otherwise { | ||||
|       to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0)) | ||||
|       tlb_sent := UInt(0) | ||||
|       state := s_translate | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   when (state === s_prepare) { | ||||
|     to_translate := adv_ptr & Cat( | ||||
|       dst_vpn =/= last_dst_vpn, | ||||
|       src_vpn =/= last_src_vpn) | ||||
|     tlb_sent := UInt(0) | ||||
|     state := s_translate | ||||
|   } | ||||
|  | ||||
|   when (state === s_finish) { state := s_idle } | ||||
|  | ||||
|   io.busy := (state =/= s_idle) || dma_busy.orR | ||||
|   io.incr_outstanding := dma_req_done | ||||
| } | ||||
|  | ||||
| object DmaCtrlRegNumbers { | ||||
|   val SRC_STRIDE = 0 | ||||
|   val DST_STRIDE = 1 | ||||
|   val SEGMENT_SIZE = 2 | ||||
|   val NSEGMENTS = 3 | ||||
|   val WORD_SIZE = 4 | ||||
|   val RESP_STATUS = 5 | ||||
|   val OUTSTANDING = 6 | ||||
|   val NCSRS = 7 | ||||
|   val CSR_BASE = 0x800 | ||||
|   val CSR_END  = CSR_BASE + NCSRS | ||||
| } | ||||
| import DmaCtrlRegNumbers._ | ||||
|  | ||||
| class DmaCtrlRegFile(implicit val p: Parameters) extends Module | ||||
|     with HasClientDmaParameters with HasTileLinkParameters { | ||||
|  | ||||
|   private val nWriteRegs = 5 | ||||
|   private val nRegs = nWriteRegs + 2 | ||||
|  | ||||
|   val io = new Bundle { | ||||
|     val wen = Bool(INPUT) | ||||
|     val waddr = UInt(INPUT, log2Up(nRegs)) | ||||
|     val wdata = UInt(INPUT, dmaSegmentSizeBits) | ||||
|  | ||||
|     val src_stride = UInt(OUTPUT, dmaSegmentSizeBits) | ||||
|     val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits) | ||||
|     val segment_size = UInt(OUTPUT, dmaSegmentSizeBits) | ||||
|     val nsegments  = UInt(OUTPUT, dmaSegmentBits) | ||||
|     val word_size = UInt(OUTPUT, dmaWordSizeBits) | ||||
|  | ||||
|     val incr_outstanding = Bool(INPUT) | ||||
|     val xact_outstanding = Bool(OUTPUT) | ||||
|   } | ||||
|  | ||||
|   val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits))) | ||||
|   val waddr = io.waddr(log2Up(NCSRS) - 1, 0) | ||||
|  | ||||
|   io.src_stride := regs(SRC_STRIDE) | ||||
|   io.dst_stride := regs(DST_STRIDE) | ||||
|   io.segment_size := regs(SEGMENT_SIZE) | ||||
|   io.nsegments := regs(NSEGMENTS) | ||||
|   io.word_size := regs(WORD_SIZE) | ||||
|  | ||||
|   when (io.wen && waddr < UInt(nWriteRegs)) { | ||||
|     regs(waddr) := io.wdata | ||||
|   } | ||||
|  | ||||
|   val outstanding_cnt = TwoWayCounter( | ||||
|     io.incr_outstanding, | ||||
|     io.wen && io.waddr === UInt(OUTSTANDING), | ||||
|     tlMaxClientXacts) | ||||
|  | ||||
|   io.xact_outstanding := outstanding_cnt > UInt(0) | ||||
| } | ||||
|  | ||||
| class DmaController(implicit p: Parameters) extends RoCC()(p) | ||||
|     with HasClientDmaParameters { | ||||
|   io.mem.req.valid := Bool(false) | ||||
|   io.resp.valid := Bool(false) | ||||
|   io.interrupt := Bool(false) | ||||
|  | ||||
|   val cmd = Queue(io.cmd) | ||||
|   val inst = cmd.bits.inst | ||||
|   val is_transfer = inst.funct < UInt(8) | ||||
|  | ||||
|   val reg_status = Reg(UInt(width = dmaStatusBits)) | ||||
|   val crfile = Module(new DmaCtrlRegFile) | ||||
|   crfile.io.waddr := io.csr.waddr | ||||
|   crfile.io.wdata := io.csr.wdata | ||||
|   crfile.io.wen := io.csr.wen | ||||
|  | ||||
|   io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride | ||||
|   io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride | ||||
|   io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size | ||||
|   io.csr.rdata(NSEGMENTS) := crfile.io.nsegments | ||||
|   io.csr.rdata(WORD_SIZE) := crfile.io.word_size | ||||
|   io.csr.rdata(RESP_STATUS) := reg_status | ||||
|  | ||||
|   val frontend = Module(new DmaFrontend) | ||||
|   io.ptw(0) <> frontend.io.ptw | ||||
|   io.autl <> frontend.io.mem | ||||
|   crfile.io.incr_outstanding := frontend.io.incr_outstanding | ||||
|   frontend.io.host_id := io.host_id | ||||
|   frontend.io.cpu.req.valid := cmd.valid && is_transfer | ||||
|   frontend.io.cpu.req.bits := ClientDmaRequest( | ||||
|     cmd = cmd.bits.inst.funct, | ||||
|     src_start = cmd.bits.rs2, | ||||
|     dst_start = cmd.bits.rs1, | ||||
|     src_stride = crfile.io.src_stride, | ||||
|     dst_stride = crfile.io.dst_stride, | ||||
|     segment_size = crfile.io.segment_size, | ||||
|     nsegments = crfile.io.nsegments, | ||||
|     word_size = crfile.io.word_size) | ||||
|   cmd.ready := is_transfer && frontend.io.cpu.req.ready | ||||
|  | ||||
|   when (frontend.io.cpu.resp.valid) { | ||||
|     reg_status := frontend.io.cpu.resp.bits.status | ||||
|   } | ||||
|  | ||||
|   io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding | ||||
| } | ||||
| @@ -12,14 +12,6 @@ import cde.{Parameters, Field} | ||||
| case object RoccMaxTaggedMemXacts extends Field[Int] | ||||
| case object RoccNMemChannels extends Field[Int] | ||||
| case object RoccNPTWPorts extends Field[Int] | ||||
| case object RoccNCSRs extends Field[Int] | ||||
|  | ||||
| class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) { | ||||
|   val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen)) | ||||
|   val waddr = UInt(OUTPUT, CSR.ADDRSZ) | ||||
|   val wdata = UInt(OUTPUT, xLen) | ||||
|   val wen = Bool(OUTPUT) | ||||
| } | ||||
|  | ||||
| class RoCCInstruction extends Bundle | ||||
| { | ||||
| @@ -59,8 +51,6 @@ class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { | ||||
|   val fpu_req = Decoupled(new FPInput) | ||||
|   val fpu_resp = Decoupled(new FPResult).flip | ||||
|   val exception = Bool(INPUT) | ||||
|   val csr = (new RoCCCSRs).flip | ||||
|   val host_id = UInt(INPUT, log2Up(nCores)) | ||||
|  | ||||
|   override def cloneType = new RoCCInterface().asInstanceOf[this.type] | ||||
| } | ||||
|   | ||||
| @@ -63,9 +63,6 @@ trait HasCoreParameters extends HasAddrMapParameters { | ||||
|   val vaddrBitsExtended = vpnBitsExtended + pgIdxBits | ||||
|   val coreMaxAddrBits = paddrBits max vaddrBitsExtended | ||||
|   val nCustomMrwCsrs = p(NCustomMRWCSRs) | ||||
|   val roccCsrs = if (p(BuildRoCC).isEmpty) Nil | ||||
|     else p(BuildRoCC).flatMap(_.csrs) | ||||
|   val nRoccCsrs = p(RoccNCSRs) | ||||
|   val nCores = p(NTiles) | ||||
|  | ||||
|   // fetchWidth doubled, but coreInstBytes halved, for RVC | ||||
| @@ -499,7 +496,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { | ||||
|   csr.io.prci <> io.prci | ||||
|   io.fpu.fcsr_rm := csr.io.fcsr_rm | ||||
|   csr.io.fcsr_flags := io.fpu.fcsr_flags | ||||
|   io.rocc.csr <> csr.io.rocc.csr | ||||
|   csr.io.rocc.interrupt <> io.rocc.interrupt | ||||
|   csr.io.pc := wb_reg_pc | ||||
|   csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata) | ||||
|   | ||||
| @@ -18,7 +18,6 @@ case class RoccParameters( | ||||
|   generator: Parameters => RoCC, | ||||
|   nMemChannels: Int = 0, | ||||
|   nPTWPorts : Int = 0, | ||||
|   csrs: Seq[Int] = Nil, | ||||
|   useFPU: Boolean = false) | ||||
|  | ||||
| abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null) | ||||
| @@ -70,12 +69,10 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null) | ||||
|       val rocc = accelParams.generator(p.alterPartial({ | ||||
|         case RoccNMemChannels => accelParams.nMemChannels | ||||
|         case RoccNPTWPorts => accelParams.nPTWPorts | ||||
|         case RoccNCSRs => accelParams.csrs.size | ||||
|       })) | ||||
|       val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) | ||||
|       rocc.io.cmd <> cmdRouter.io.out(i) | ||||
|       rocc.io.exception := core.io.rocc.exception | ||||
|       rocc.io.host_id := io.prci.id | ||||
|       dcIF.io.requestor <> rocc.io.mem | ||||
|       dcPorts += dcIF.io.cache | ||||
|       uncachedArbPorts += rocc.io.autl | ||||
| @@ -101,18 +98,6 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null) | ||||
|     core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) | ||||
|     respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) | ||||
|  | ||||
|     if (p(RoccNCSRs) > 0) { | ||||
|       core.io.rocc.csr.rdata <> roccs.flatMap(_.io.csr.rdata) | ||||
|       for ((rocc, accelParams) <- roccs.zip(buildRocc)) { | ||||
|         rocc.io.csr.waddr := core.io.rocc.csr.waddr | ||||
|         rocc.io.csr.wdata := core.io.rocc.csr.wdata | ||||
|         rocc.io.csr.wen := core.io.rocc.csr.wen && | ||||
|           accelParams.csrs | ||||
|             .map(core.io.rocc.csr.waddr === UInt(_)) | ||||
|             .reduce((a, b) => a || b) | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     ptwPorts ++= roccs.flatMap(_.io.ptw) | ||||
|     uncachedPorts ++= roccs.flatMap(_.io.utl) | ||||
|   } | ||||
|   | ||||
| @@ -1,535 +0,0 @@ | ||||
| package uncore.devices | ||||
|  | ||||
| import Chisel._ | ||||
| import cde.{Parameters, Field} | ||||
| import junctions._ | ||||
| import junctions.NastiConstants._ | ||||
| import uncore.tilelink._ | ||||
| import uncore.Util._ | ||||
|  | ||||
| case object NDmaTransactors extends Field[Int] | ||||
| case object NDmaXacts extends Field[Int] | ||||
| case object NDmaClients extends Field[Int] | ||||
|  | ||||
| trait HasDmaParameters { | ||||
|   implicit val p: Parameters | ||||
|   val nDmaTransactors = p(NDmaTransactors) | ||||
|   val nDmaXacts = p(NDmaXacts) | ||||
|   val nDmaClients = p(NDmaClients) | ||||
|   val dmaXactIdBits = log2Up(nDmaXacts) | ||||
|   val dmaClientIdBits = log2Up(nDmaClients) | ||||
|   val addrBits = p(PAddrBits) | ||||
|   val dmaStatusBits = 2 | ||||
|   val dmaWordSizeBits = 2 | ||||
| } | ||||
|  | ||||
| abstract class DmaModule(implicit val p: Parameters) extends Module with HasDmaParameters | ||||
| abstract class DmaBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasDmaParameters | ||||
|  | ||||
| class DmaRequest(implicit p: Parameters) extends DmaBundle()(p) { | ||||
|   val xact_id = UInt(width = dmaXactIdBits) | ||||
|   val client_id = UInt(width = dmaClientIdBits) | ||||
|   val cmd = UInt(width = DmaRequest.DMA_CMD_SZ) | ||||
|   val source = UInt(width = addrBits) | ||||
|   val dest = UInt(width = addrBits) | ||||
|   val length = UInt(width = addrBits) | ||||
|   val size = UInt(width = dmaWordSizeBits) | ||||
| } | ||||
|  | ||||
| class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) { | ||||
|   val xact_id = UInt(width = dmaXactIdBits) | ||||
|   val client_id = UInt(width = dmaClientIdBits) | ||||
|   val status = UInt(width = dmaStatusBits) | ||||
| } | ||||
|  | ||||
| object DmaRequest { | ||||
|   val DMA_CMD_SZ = 3 | ||||
|  | ||||
|   val DMA_CMD_COPY = UInt("b000") | ||||
|   val DMA_CMD_PFR  = UInt("b010") | ||||
|   val DMA_CMD_PFW  = UInt("b011") | ||||
|   val DMA_CMD_SIN  = UInt("b100") | ||||
|   val DMA_CMD_SOUT = UInt("b101") | ||||
|  | ||||
|   def apply(xact_id: UInt = UInt(0), | ||||
|             client_id: UInt, | ||||
|             cmd: UInt, | ||||
|             source: UInt, | ||||
|             dest: UInt, | ||||
|             length: UInt, | ||||
|             size: UInt = UInt(0))(implicit p: Parameters): DmaRequest = { | ||||
|     val req = Wire(new DmaRequest) | ||||
|     req.xact_id := xact_id | ||||
|     req.client_id := client_id | ||||
|     req.cmd := cmd | ||||
|     req.source := source | ||||
|     req.dest := dest | ||||
|     req.length := length | ||||
|     req.size := size | ||||
|     req | ||||
|   } | ||||
| } | ||||
| import DmaRequest._ | ||||
|  | ||||
| class DmaIO(implicit p: Parameters) extends DmaBundle()(p) { | ||||
|   val req = Decoupled(new DmaRequest) | ||||
|   val resp = Decoupled(new DmaResponse).flip | ||||
| } | ||||
|  | ||||
| class DmaTrackerIO(implicit p: Parameters) extends DmaBundle()(p) { | ||||
|   val dma = (new DmaIO).flip | ||||
|   val mem = new ClientUncachedTileLinkIO | ||||
|   val mmio = new NastiIO | ||||
| } | ||||
|  | ||||
| class DmaManager(outstandingCSR: Int)(implicit p: Parameters) | ||||
|     extends DmaModule()(p) | ||||
|     with HasNastiParameters | ||||
|     with HasAddrMapParameters { | ||||
|  | ||||
|   val io = new Bundle { | ||||
|     val ctrl = (new NastiIO).flip | ||||
|     val mmio = new NastiIO | ||||
|     val dma = new DmaIO | ||||
|   } | ||||
|  | ||||
|   private val wordBits = 1 << log2Up(addrBits) | ||||
|   private val wordBytes = wordBits / 8 | ||||
|   private val wordOff = log2Up(wordBytes) | ||||
|   private val wordMSB = wordOff + 2 | ||||
|  | ||||
|   val s_idle :: s_wdata :: s_dma_req :: s_wresp :: Nil = Enum(Bits(), 4) | ||||
|   val state = Reg(init = s_idle) | ||||
|  | ||||
|   val nCtrlWords = (addrBits * 4) / nastiXDataBits | ||||
|   val ctrl_regs = Reg(Vec(nCtrlWords, UInt(width = nastiXDataBits))) | ||||
|   val ctrl_idx = Reg(UInt(width = log2Up(nCtrlWords))) | ||||
|   val ctrl_done = Reg(Bool()) | ||||
|   val ctrl_blob = ctrl_regs.asUInt | ||||
|   val ctrl_id = Reg(UInt(width = nastiXIdBits)) | ||||
|  | ||||
|   val sizeOffset = 3 * addrBits | ||||
|   val cmdOffset = sizeOffset + dmaWordSizeBits | ||||
|  | ||||
|   val dma_req = new DmaRequest().fromBits(ctrl_blob) | ||||
|   val dma_busy = Reg(init = UInt(0, nDmaXacts)) | ||||
|   val dma_xact_id = PriorityEncoder(~dma_busy) | ||||
|  | ||||
|   when (io.ctrl.aw.fire()) { | ||||
|     ctrl_id := io.ctrl.aw.bits.id | ||||
|     ctrl_idx := UInt(0) | ||||
|     ctrl_done := Bool(false) | ||||
|     state := s_wdata | ||||
|   } | ||||
|  | ||||
|   when (io.ctrl.w.fire()) { | ||||
|     when (!ctrl_done) { | ||||
|       ctrl_regs(ctrl_idx) := io.ctrl.w.bits.data | ||||
|       ctrl_idx := ctrl_idx + UInt(1) | ||||
|     } | ||||
|     when (ctrl_idx === UInt(nCtrlWords - 1)) { ctrl_done := Bool(true) } | ||||
|     when (io.ctrl.w.bits.last) { state := s_dma_req } | ||||
|   } | ||||
|  | ||||
|   dma_busy := (dma_busy | | ||||
|     Mux(io.dma.req.fire(), UIntToOH(dma_xact_id), UInt(0))) & | ||||
|     ~Mux(io.dma.resp.fire(), UIntToOH(io.dma.resp.bits.xact_id), UInt(0)) | ||||
|  | ||||
|   when (io.dma.req.fire()) { state := s_wresp } | ||||
|   when (io.ctrl.b.fire()) { state := s_idle } | ||||
|  | ||||
|   io.ctrl.ar.ready := Bool(false) | ||||
|   io.ctrl.aw.ready := (state === s_idle) | ||||
|   io.ctrl.w.ready := (state === s_wdata) | ||||
|  | ||||
|   io.ctrl.r.valid := Bool(false) | ||||
|   io.ctrl.b.valid := (state === s_wresp) | ||||
|   io.ctrl.b.bits := NastiWriteResponseChannel(id = ctrl_id) | ||||
|  | ||||
|   io.dma.req.valid := (state === s_dma_req) && !dma_busy.andR | ||||
|   io.dma.req.bits := dma_req | ||||
|   io.dma.req.bits.xact_id := dma_xact_id | ||||
|  | ||||
|   val resp_waddr_pending = Reg(init = Bool(false)) | ||||
|   val resp_wdata_pending = Reg(init = Bool(false)) | ||||
|   val resp_wresp_pending = Reg(init = Bool(false)) | ||||
|   val resp_pending = resp_waddr_pending || resp_wdata_pending || resp_wresp_pending | ||||
|  | ||||
|   val resp_client_id = Reg(UInt(width = dmaClientIdBits)) | ||||
|   val resp_status = Reg(UInt(width = dmaStatusBits)) | ||||
|  | ||||
|   io.dma.resp.ready := !resp_pending | ||||
|  | ||||
|   when (io.dma.resp.fire()) { | ||||
|     resp_client_id := io.dma.resp.bits.client_id | ||||
|     resp_status := io.dma.resp.bits.status | ||||
|     resp_waddr_pending := Bool(true) | ||||
|     resp_wdata_pending := Bool(true) | ||||
|     resp_wresp_pending := Bool(true) | ||||
|   } | ||||
|  | ||||
|   val addrTable = Vec.tabulate(nDmaClients) { i => | ||||
|     //UInt(addrMap(s"conf:csr$i").start + outstandingCSR * csrDataBytes) | ||||
|     require(false, "CSR MMIO ports no longer exist") | ||||
|     UInt(0) | ||||
|   } | ||||
|  | ||||
|   io.mmio.ar.valid := Bool(false) | ||||
|   io.mmio.aw.valid := resp_waddr_pending | ||||
|   io.mmio.aw.bits := NastiWriteAddressChannel( | ||||
|     id = UInt(0), | ||||
|     addr = addrTable(resp_client_id), | ||||
|     size = { require(false, "CSR MMIO ports no longer exist"); UInt(0) }) | ||||
|   io.mmio.w.valid := resp_wdata_pending | ||||
|   io.mmio.w.bits := NastiWriteDataChannel(data = resp_status) | ||||
|   io.mmio.b.ready := resp_wresp_pending | ||||
|   io.mmio.r.ready := Bool(false) | ||||
|  | ||||
|   when (io.mmio.aw.fire()) { resp_waddr_pending := Bool(false) } | ||||
|   when (io.mmio.w.fire()) { resp_wdata_pending := Bool(false) } | ||||
|   when (io.mmio.b.fire()) { resp_wresp_pending := Bool(false) } | ||||
| } | ||||
|  | ||||
| class DmaEngine(outstandingCSR: Int)(implicit p: Parameters) extends DmaModule()(p) { | ||||
|   val io = new Bundle { | ||||
|     val ctrl = (new NastiIO).flip | ||||
|     val mem = new ClientUncachedTileLinkIO | ||||
|     val mmio = new NastiIO | ||||
|   } | ||||
|  | ||||
|   val manager = Module(new DmaManager(outstandingCSR)) | ||||
|   val trackers = Module(new DmaTrackerFile) | ||||
|  | ||||
|   manager.io.ctrl <> io.ctrl | ||||
|   trackers.io.dma <> manager.io.dma | ||||
|  | ||||
|   val innerIOs = trackers.io.mem | ||||
|   val outerIOs = trackers.io.mmio :+ manager.io.mmio | ||||
|  | ||||
|   val innerArb = Module(new ClientUncachedTileLinkIOArbiter(innerIOs.size)) | ||||
|   innerArb.io.in <> innerIOs | ||||
|   io.mem <> innerArb.io.out | ||||
|  | ||||
|   val outerArb = Module(new NastiArbiter(outerIOs.size)) | ||||
|   outerArb.io.master <> outerIOs | ||||
|   io.mmio <> outerArb.io.slave | ||||
|  | ||||
|   assert(!io.mmio.b.valid || io.mmio.b.bits.resp === UInt(0), | ||||
|     "DmaEngine: NASTI write response error") | ||||
|  | ||||
|   assert(!io.mmio.r.valid || io.mmio.r.bits.resp === UInt(0), | ||||
|     "DmaEngine: NASTI read response error") | ||||
| } | ||||
|  | ||||
| class DmaTrackerFile(implicit p: Parameters) extends DmaModule()(p) { | ||||
|   val io = new Bundle { | ||||
|     val dma = (new DmaIO).flip | ||||
|     val mem = Vec(nDmaTransactors, new ClientUncachedTileLinkIO) | ||||
|     val mmio = Vec(nDmaTransactors, new NastiIO) | ||||
|   } | ||||
|  | ||||
|   val trackers = List.fill(nDmaTransactors) { Module(new DmaTracker) } | ||||
|   val reqReadys = trackers.map(_.io.dma.req.ready).asUInt | ||||
|  | ||||
|   io.mem <> trackers.map(_.io.mem) | ||||
|   io.mmio <> trackers.map(_.io.mmio) | ||||
|  | ||||
|   if (nDmaTransactors > 1) { | ||||
|     val resp_arb = Module(new RRArbiter(new DmaResponse, nDmaTransactors)) | ||||
|     resp_arb.io.in <> trackers.map(_.io.dma.resp) | ||||
|     io.dma.resp <> resp_arb.io.out | ||||
|  | ||||
|     val selection = PriorityEncoder(reqReadys) | ||||
|     trackers.zipWithIndex.foreach { case (tracker, i) => | ||||
|       tracker.io.dma.req.valid := io.dma.req.valid && selection === UInt(i) | ||||
|       tracker.io.dma.req.bits := io.dma.req.bits | ||||
|     } | ||||
|     io.dma.req.ready := reqReadys.orR | ||||
|   } else { | ||||
|     io.dma <> trackers.head.io.dma | ||||
|   } | ||||
| } | ||||
|  | ||||
| class DmaTracker(implicit p: Parameters) extends DmaModule()(p) | ||||
|     with HasTileLinkParameters with HasNastiParameters { | ||||
|   val io = new DmaTrackerIO | ||||
|  | ||||
|   private val blockOffset = tlBeatAddrBits + tlByteAddrBits | ||||
|   private val blockBytes = tlDataBeats * tlDataBytes | ||||
|  | ||||
|   val data_buffer = Reg(Vec(2 * tlDataBeats, Bits(width = tlDataBits))) | ||||
|   val get_inflight = Reg(UInt(2 * tlDataBeats)) | ||||
|   val put_inflight = Reg(Bool()) | ||||
|   val put_half = Reg(UInt(width = 1)) | ||||
|   val get_half = Reg(UInt(width = 1)) | ||||
|   val prefetch_put = Reg(Bool()) | ||||
|   val get_done = !get_inflight.orR | ||||
|  | ||||
|   val src_block = Reg(UInt(width = tlBlockAddrBits)) | ||||
|   val dst_block = Reg(UInt(width = tlBlockAddrBits)) | ||||
|   val offset    = Reg(UInt(width = blockOffset)) | ||||
|   val alignment = Reg(UInt(width = blockOffset)) | ||||
|   val shift_dir = Reg(Bool()) | ||||
|  | ||||
|   val bytes_left = Reg(UInt(width = addrBits)) | ||||
|   val streaming = Reg(Bool()) | ||||
|   val stream_addr = Reg(UInt(width = nastiXAddrBits)) | ||||
|   val stream_len = Reg(UInt(width = nastiXLenBits)) | ||||
|   val stream_size = Reg(UInt(width = nastiXSizeBits)) | ||||
|   val stream_idx = Reg(UInt(width = blockOffset)) | ||||
|   val stream_bytesel = MuxLookup(stream_size, UInt("b11111111"), Seq( | ||||
|     UInt("b00") -> UInt("b00000001"), | ||||
|     UInt("b01") -> UInt("b00000011"), | ||||
|     UInt("b10") -> UInt("b00001111"))) | ||||
|   val stream_mask = FillInterleaved(8, stream_bytesel) | ||||
|   val stream_last = Reg(Bool()) | ||||
|  | ||||
|   val stream_word_bytes = UInt(1) << stream_size | ||||
|   val stream_beat_idx = stream_idx(blockOffset - 1, tlByteAddrBits) | ||||
|   val stream_byte_idx = stream_idx(tlByteAddrBits - 1, 0) | ||||
|   val stream_bitshift = Cat(stream_byte_idx, UInt(0, 3)) | ||||
|   val stream_in_beat = | ||||
|     (((io.mmio.r.bits.data & stream_mask) << stream_bitshift)) | | ||||
|     (data_buffer(stream_beat_idx) & ~(stream_mask << stream_bitshift)) | ||||
|   val stream_out_word = data_buffer(stream_beat_idx) >> stream_bitshift | ||||
|   val stream_out_last = bytes_left === stream_word_bytes | ||||
|  | ||||
|   val acq = io.mem.acquire.bits | ||||
|   val gnt = io.mem.grant.bits | ||||
|  | ||||
|   val (s_idle :: s_get :: s_put :: s_prefetch :: | ||||
|        s_stream_read_req :: s_stream_read_resp :: | ||||
|        s_stream_write_req :: s_stream_write_data :: s_stream_write_resp :: | ||||
|        s_wait :: s_resp :: Nil) = Enum(Bits(), 11) | ||||
|   val state = Reg(init = s_idle) | ||||
|  | ||||
|   val (put_beat, put_done) = Counter( | ||||
|     io.mem.acquire.fire() && acq.hasData(), tlDataBeats) | ||||
|  | ||||
|   val put_mask = Seq.tabulate(tlDataBytes) { i => | ||||
|     val byte_index = Cat(put_beat, UInt(i, tlByteAddrBits)) | ||||
|     byte_index >= offset && byte_index < bytes_left | ||||
|   }.asUInt | ||||
|  | ||||
|   val prefetch_sent = io.mem.acquire.fire() && io.mem.acquire.bits.isPrefetch() | ||||
|   val prefetch_busy = Reg(init = UInt(0, tlMaxClientXacts)) | ||||
|   val (prefetch_id, _) = Counter(prefetch_sent, tlMaxClientXacts) | ||||
|  | ||||
|   val base_index = Cat(put_half, put_beat) | ||||
|   val put_data = Wire(init = Bits(0, tlDataBits)) | ||||
|   val beat_align = alignment(blockOffset - 1, tlByteAddrBits) | ||||
|   val bit_align = Cat(alignment(tlByteAddrBits - 1, 0), UInt(0, 3)) | ||||
|   val rev_align = UInt(tlDataBits) - bit_align | ||||
|  | ||||
|   def getBit(value: UInt, sel: UInt): Bool = | ||||
|     (value >> sel)(0) | ||||
|  | ||||
|   when (alignment === UInt(0)) { | ||||
|     put_data := data_buffer(base_index) | ||||
|   } .elsewhen (shift_dir) { | ||||
|     val shift_index = base_index - beat_align | ||||
|     when (bit_align === UInt(0)) { | ||||
|       put_data := data_buffer(shift_index) | ||||
|     } .otherwise { | ||||
|       val upper_bits = data_buffer(shift_index) | ||||
|       val lower_bits = data_buffer(shift_index - UInt(1)) | ||||
|       val upper_shifted = upper_bits << bit_align | ||||
|       val lower_shifted = lower_bits >> rev_align | ||||
|       put_data := upper_shifted | lower_shifted | ||||
|     } | ||||
|   } .otherwise { | ||||
|     val shift_index = base_index + beat_align | ||||
|     when (bit_align === UInt(0)) { | ||||
|       put_data := data_buffer(shift_index) | ||||
|     } .otherwise { | ||||
|       val upper_bits = data_buffer(shift_index + UInt(1)) | ||||
|       val lower_bits = data_buffer(shift_index) | ||||
|       val upper_shifted = upper_bits << rev_align | ||||
|       val lower_shifted = lower_bits >> bit_align | ||||
|       put_data := upper_shifted | lower_shifted | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   val put_acquire = PutBlock( | ||||
|     client_xact_id = UInt(2), | ||||
|     addr_block = dst_block, | ||||
|     addr_beat = put_beat, | ||||
|     data = put_data, | ||||
|     wmask = Some(put_mask)) | ||||
|  | ||||
|   val get_acquire = GetBlock( | ||||
|     client_xact_id = get_half, | ||||
|     addr_block = src_block, | ||||
|     alloc = Bool(false)) | ||||
|  | ||||
|   val prefetch_acquire = Mux(prefetch_put, | ||||
|     PutPrefetch(client_xact_id = prefetch_id, addr_block = dst_block), | ||||
|     GetPrefetch(client_xact_id = prefetch_id, addr_block = dst_block)) | ||||
|  | ||||
|   val resp_xact_id = Reg(UInt(width = dmaXactIdBits)) | ||||
|   val resp_client_id = Reg(UInt(width = dmaClientIdBits)) | ||||
|  | ||||
|   io.mem.acquire.valid := (state === s_get) || | ||||
|                           (state === s_put && get_done) || | ||||
|                           (state === s_prefetch && !prefetch_busy(prefetch_id)) | ||||
|   io.mem.acquire.bits := MuxLookup( | ||||
|     state, prefetch_acquire, Seq( | ||||
|       s_get -> get_acquire, | ||||
|       s_put -> put_acquire)) | ||||
|   io.mem.grant.ready := Bool(true) | ||||
|   io.dma.req.ready := state === s_idle | ||||
|   io.dma.resp.valid := state === s_resp | ||||
|   io.dma.resp.bits.xact_id := resp_xact_id | ||||
|   io.dma.resp.bits.client_id := resp_client_id | ||||
|   io.dma.resp.bits.status := UInt(0) | ||||
|   io.mmio.ar.valid := (state === s_stream_read_req) | ||||
|   io.mmio.ar.bits := NastiReadAddressChannel( | ||||
|     id = UInt(0), | ||||
|     addr = stream_addr, | ||||
|     size = stream_size, | ||||
|     len  = stream_len, | ||||
|     burst = BURST_FIXED) | ||||
|   io.mmio.r.ready := (state === s_stream_read_resp) | ||||
|  | ||||
|   io.mmio.aw.valid := (state === s_stream_write_req) | ||||
|   io.mmio.aw.bits := NastiWriteAddressChannel( | ||||
|     id = UInt(0), | ||||
|     addr = stream_addr, | ||||
|     size = stream_size, | ||||
|     len  = stream_len, | ||||
|     burst = BURST_FIXED) | ||||
|   io.mmio.w.valid := (state === s_stream_write_data) && get_done | ||||
|   io.mmio.w.bits := NastiWriteDataChannel( | ||||
|     data = stream_out_word, | ||||
|     last = stream_out_last) | ||||
|   io.mmio.b.ready := (state === s_stream_write_resp) | ||||
|  | ||||
|   when (io.dma.req.fire()) { | ||||
|     val src_off = io.dma.req.bits.source(blockOffset - 1, 0) | ||||
|     val dst_off = io.dma.req.bits.dest(blockOffset - 1, 0) | ||||
|     val direction = src_off < dst_off | ||||
|  | ||||
|     resp_xact_id := io.dma.req.bits.xact_id | ||||
|     resp_client_id := io.dma.req.bits.client_id | ||||
|     src_block := io.dma.req.bits.source(addrBits - 1, blockOffset) | ||||
|     dst_block := io.dma.req.bits.dest(addrBits - 1, blockOffset) | ||||
|     alignment := Mux(direction, dst_off - src_off, src_off - dst_off) | ||||
|     shift_dir := direction | ||||
|     offset := dst_off | ||||
|     bytes_left := io.dma.req.bits.length + dst_off | ||||
|     get_inflight := UInt(0) | ||||
|     put_inflight := Bool(false) | ||||
|     get_half := UInt(0) | ||||
|     put_half := UInt(0) | ||||
|     streaming := Bool(false) | ||||
|     stream_len := (io.dma.req.bits.length >> io.dma.req.bits.size) - UInt(1) | ||||
|     stream_size := io.dma.req.bits.size | ||||
|     stream_last := Bool(false) | ||||
|  | ||||
|     when (io.dma.req.bits.cmd === DMA_CMD_COPY) { | ||||
|       state := s_get | ||||
|     } .elsewhen (io.dma.req.bits.cmd(2, 1) === UInt("b01")) { | ||||
|       prefetch_put := io.dma.req.bits.cmd(0) | ||||
|       state := s_prefetch | ||||
|     } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SIN) { | ||||
|       stream_addr := io.dma.req.bits.source | ||||
|       stream_idx := dst_off | ||||
|       streaming := Bool(true) | ||||
|       alignment := UInt(0) | ||||
|       state := s_stream_read_req | ||||
|     } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SOUT) { | ||||
|       stream_addr := io.dma.req.bits.dest | ||||
|       stream_idx := src_off | ||||
|       streaming := Bool(true) | ||||
|       bytes_left := io.dma.req.bits.length | ||||
|       state := s_stream_write_req | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   when (io.mmio.ar.fire()) { state := s_stream_read_resp } | ||||
|  | ||||
|   when (io.mmio.r.fire()) { | ||||
|     data_buffer(stream_beat_idx) := stream_in_beat | ||||
|     stream_idx := stream_idx + stream_word_bytes | ||||
|     val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes | ||||
|     when (block_finished || io.mmio.r.bits.last) { state := s_put } | ||||
|   } | ||||
|  | ||||
|   when (io.mmio.aw.fire()) { state := s_get } | ||||
|  | ||||
|   when (io.mmio.w.fire()) { | ||||
|     stream_idx := stream_idx + stream_word_bytes | ||||
|     bytes_left := bytes_left - stream_word_bytes | ||||
|     val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes | ||||
|     when (stream_out_last) { | ||||
|       state := s_stream_write_resp | ||||
|     } .elsewhen (block_finished) { | ||||
|       state := s_get | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   when (io.mmio.b.fire()) { state := s_resp } | ||||
|  | ||||
|   when (state === s_get && io.mem.acquire.ready) { | ||||
|     get_inflight := get_inflight | FillInterleaved(tlDataBeats, UIntToOH(get_half)) | ||||
|     src_block := src_block + UInt(1) | ||||
|     when (streaming) { | ||||
|       state := s_stream_write_data | ||||
|     } .otherwise { | ||||
|       val bytes_in_buffer = UInt(blockBytes) - alignment | ||||
|       val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off | ||||
|                        get_half === UInt(0) && // this is the first block | ||||
|                        bytes_in_buffer < bytes_left // there is still more data left to fetch | ||||
|       get_half := get_half + UInt(1) | ||||
|       when (!extra_read) { state := s_put } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   when (prefetch_sent) { | ||||
|     prefetch_busy := prefetch_busy | UIntToOH(prefetch_id) | ||||
|     when (bytes_left < UInt(blockBytes)) { | ||||
|       bytes_left := UInt(0) | ||||
|       state := s_resp | ||||
|     } .otherwise { | ||||
|       bytes_left := bytes_left - UInt(blockBytes) | ||||
|       dst_block := dst_block + UInt(1) | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   when (io.mem.grant.fire()) { | ||||
|     when (gnt.g_type === Grant.prefetchAckType) { | ||||
|       prefetch_busy := prefetch_busy & ~UIntToOH(gnt.client_xact_id) | ||||
|     } .elsewhen (gnt.hasData()) { | ||||
|       val write_half = gnt.client_xact_id(0) | ||||
|       val write_idx = Cat(write_half, gnt.addr_beat) | ||||
|       get_inflight := get_inflight & ~UIntToOH(write_idx) | ||||
|       data_buffer(write_idx) := gnt.data | ||||
|     } .otherwise { | ||||
|       put_inflight := Bool(false) | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   when (put_done) { // state === s_put | ||||
|     when (!streaming) { | ||||
|       put_half := put_half + UInt(1) | ||||
|     } | ||||
|     offset := UInt(0) | ||||
|     stream_idx := UInt(0) | ||||
|     when (bytes_left < UInt(blockBytes)) { | ||||
|       bytes_left := UInt(0) | ||||
|     } .otherwise { | ||||
|       bytes_left := bytes_left - UInt(blockBytes) | ||||
|     } | ||||
|     put_inflight := Bool(true) | ||||
|     dst_block := dst_block + UInt(1) | ||||
|     state := s_wait | ||||
|   } | ||||
|  | ||||
|   when (state === s_wait && get_done && !put_inflight) { | ||||
|     state := MuxCase(s_get, Seq( | ||||
|       (bytes_left === UInt(0)) -> s_resp, | ||||
|       streaming -> s_stream_read_resp)) | ||||
|   } | ||||
|  | ||||
|   when (io.dma.resp.fire()) { state := s_idle } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user