From 305185c034336f08ca467dfcaf4113ee416e5783 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 14 Jan 2016 11:37:58 -0800 Subject: [PATCH] send DMA requests through MMIO and get responses through CSRs --- rocket/src/main/scala/csr.scala | 12 +- rocket/src/main/scala/dma.scala | 210 +++++++++++++---------- rocket/src/main/scala/instructions.scala | 1 + rocket/src/main/scala/rocc.scala | 14 +- rocket/src/main/scala/rocket.scala | 4 + rocket/src/main/scala/tile.scala | 37 ++-- rocket/src/main/scala/tlb.scala | 24 +++ 7 files changed, 189 insertions(+), 113 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a3873c6b..a1c277f9 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -252,12 +252,16 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } for (i <- 0 until nCustomMrwCsrs) { - val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? - require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") + val addr = CSRs.mrwbase + i require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") read_mapping += addr -> io.custom_mrw_csrs(i) } + for ((addr, i) <- roccCsrs.zipWithIndex) { + require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use") + read_mapping += addr -> io.rocc.csr.rdata(i) + } + val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr) val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } @@ -449,6 +453,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } } + io.rocc.csr.waddr := addr + io.rocc.csr.wdata := wdata + io.rocc.csr.wen := wen + when(this.reset) { reg_mstatus.zero1 := 0 reg_mstatus.zero2 := 0 diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index c2a02bec..24240c18 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -71,19 +71,23 @@ class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) { val resp = Valid(new ClientDmaResponse).flip } -class DmaFrontend(implicit val p: Parameters) - extends Module with HasClientDmaParameters { +class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) + with HasClientDmaParameters with HasTileLinkParameters { val io = new Bundle { val cpu = (new ClientDmaIO).flip - val dma = new DmaIO + val mem = new ClientUncachedTileLinkIO val ptw = new TLBPTWIO val busy = Bool(OUTPUT) + val incr_outstanding = Bool(OUTPUT) + val host_id = UInt(INPUT, log2Up(nCores)) } - private val pgSize = 1 << pgIdxBits + val tlb = Module(new DecoupledTLB()(p.alterPartial({ + case CacheName => "L1D" + }))) + io.ptw <> tlb.io.ptw - val priv = Mux(io.ptw.status.mprv, io.ptw.status.prv1, io.ptw.status.prv) - val vm_enabled = io.ptw.status.vm(3) && priv <= UInt(PRV_S) + private val pgSize = 1 << pgIdxBits val cmd = Reg(UInt(width = DMA_CMD_SZ)) val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq( @@ -112,17 +116,17 @@ class DmaFrontend(implicit val p: Parameters) val src_ppn = Reg(UInt(width = ppnBits)) val dst_ppn = Reg(UInt(width = ppnBits)) - val src_paddr = Mux(vm_enabled, Cat(src_ppn, src_idx), src_vaddr) - val dst_paddr = Mux(vm_enabled, Cat(dst_ppn, dst_idx), dst_vaddr) + val src_paddr = Cat(src_ppn, src_idx) + val dst_paddr = Cat(dst_ppn, dst_idx) val last_src_vpn = Reg(UInt(width = vpnBits)) val last_dst_vpn = Reg(UInt(width = vpnBits)) - val tx_len = Mux(!vm_enabled, bytes_left, - Util.minUInt(src_pglen, dst_pglen, bytes_left)) + val tx_len = Util.minUInt(src_pglen, dst_pglen, bytes_left) - val (dma_xact_id, _) = Counter(io.dma.req.fire(), nDmaXactsPerClient) - val dma_busy = Reg(init = UInt(0, nDmaXactsPerClient)) + val dma_busy = Reg(init = UInt(0, tlMaxClientXacts)) + val dma_xact_id = PriorityEncoder(~dma_busy) + val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats) val (s_idle :: s_translate :: s_dma_req :: s_dma_update :: s_prepare :: s_finish :: Nil) = Enum(Bits(), 6) @@ -130,49 +134,80 @@ class DmaFrontend(implicit val p: Parameters) // lower bit is for src, higher bit is for dst val to_translate = Reg(init = UInt(0, 2)) - val ptw_sent = Reg(init = UInt(0, 2)) - val ptw_to_send = to_translate & ~ptw_sent - val ptw_resp_id = Reg(init = UInt(0, 1)) + val tlb_sent = Reg(init = UInt(0, 2)) + val tlb_to_send = to_translate & ~tlb_sent val resp_status = Reg(UInt(width = dmaStatusBits)) - io.ptw.req.valid := ptw_to_send.orR && vm_enabled - io.ptw.req.bits.addr := Mux(ptw_to_send(0), src_vpn, dst_vpn) - io.ptw.req.bits.prv := io.ptw.status.prv - io.ptw.req.bits.store := !ptw_to_send(0) // storing to destination - io.ptw.req.bits.fetch := Bool(true) + def make_acquire( + addr_beat: UInt, client_xact_id: UInt, client_id: UInt, + cmd: UInt, source: UInt, dest: UInt, + length: UInt, size: UInt): Acquire = { - when (io.ptw.req.fire()) { - ptw_sent := ptw_sent | PriorityEncoderOH(ptw_to_send) + val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits)) + data_blob := DmaRequest( + xact_id = UInt(0), + client_id = client_id, + cmd = cmd, + source = source, + dest = dest, + length = length, + size = size).toBits + val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob) + val base_addr = addrMap("devices:dma").start + val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits)) + + PutBlock( + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data_beats(addr_beat), + alloc = Bool(false)) } - when (io.ptw.resp.valid) { - when (io.ptw.resp.bits.error) { + tlb.io.req.valid := tlb_to_send.orR + tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn) + tlb.io.req.bits.passthrough := Bool(false) + tlb.io.req.bits.instruction := Bool(false) + tlb.io.req.bits.store := !tlb_to_send(0) + tlb.io.resp.ready := tlb_sent.orR + + when (tlb.io.req.fire()) { + tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send) + } + + when (tlb.io.resp.fire()) { + val recv_choice = PriorityEncoderOH(to_translate) + val error = Mux(recv_choice(0), + tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st) + + when (error) { resp_status := ClientDmaResponse.pagefault state := s_finish } - val recv_choice = PriorityEncoderOH(to_translate) - to_translate := to_translate & ~recv_choice // getting the src translation when (recv_choice(0)) { - src_ppn := io.ptw.resp.bits.pte.ppn + src_ppn := tlb.io.resp.bits.ppn } .otherwise { - dst_ppn := io.ptw.resp.bits.pte.ppn + dst_ppn := tlb.io.resp.bits.ppn } + + to_translate := to_translate & ~recv_choice } io.cpu.req.ready := state === s_idle io.cpu.resp.valid := state === s_finish io.cpu.resp.bits := ClientDmaResponse(resp_status) - io.dma.req.valid := state === s_dma_req && !dma_busy(dma_xact_id) - io.dma.req.bits := DmaRequest( + + io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR + io.mem.acquire.bits := make_acquire( + addr_beat = dma_req_beat, + client_id = io.host_id, client_xact_id = dma_xact_id, - cmd = cmd, - source = src_paddr, - dest = dst_paddr, - length = tx_len, - size = word_size) - io.dma.resp.ready := Bool(true) + cmd = cmd, source = src_paddr, dest = dst_paddr, + length = tx_len, size = word_size) + + io.mem.grant.ready := (state =/= s_dma_req) when (io.cpu.req.fire()) { val req = io.cpu.req.bits @@ -187,24 +222,23 @@ class DmaFrontend(implicit val p: Parameters) bytes_left := req.segment_size word_size := req.word_size to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11")) - ptw_sent := UInt(0) - state := Mux(vm_enabled, s_translate, s_dma_req) + tlb_sent := UInt(0) + state := s_translate } when (state === s_translate && !to_translate.orR) { state := s_dma_req } - def setBusyOnSend(req: DecoupledIO[DmaRequest]): UInt = - Mux(req.fire(), UIntToOH(req.bits.client_xact_id), UInt(0)) + def setBusy(set: Bool, xact_id: UInt): UInt = + Mux(set, UIntToOH(xact_id), UInt(0)) - def clearBusyOnRecv(resp: DecoupledIO[DmaResponse]): UInt = - ~Mux(resp.fire(), UIntToOH(resp.bits.client_xact_id), UInt(0)) + dma_busy := (dma_busy | + setBusy(dma_req_done, dma_xact_id)) & + ~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id) - dma_busy := (dma_busy | setBusyOnSend(io.dma.req)) & - clearBusyOnRecv(io.dma.resp) - when (io.dma.req.fire()) { + when (dma_req_done) { src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0)) dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0)) bytes_left := bytes_left - tx_len @@ -223,11 +257,11 @@ class DmaFrontend(implicit val p: Parameters) dst_vaddr := dst_vaddr + dst_stride bytes_left := segment_size segments_left := segments_left - UInt(1) - state := Mux(vm_enabled, s_prepare, s_dma_req) + state := s_prepare } } .otherwise { to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0)) - ptw_sent := UInt(0) + tlb_sent := UInt(0) state := s_translate } } @@ -236,13 +270,14 @@ class DmaFrontend(implicit val p: Parameters) to_translate := adv_ptr & Cat( dst_vpn =/= last_dst_vpn, src_vpn =/= last_src_vpn) - ptw_sent := UInt(0) + tlb_sent := UInt(0) state := s_translate } when (state === s_finish) { state := s_idle } io.busy := (state =/= s_idle) || dma_busy.orR + io.incr_outstanding := dma_req_done } object DmaCtrlRegNumbers { @@ -252,19 +287,23 @@ object DmaCtrlRegNumbers { val NSEGMENTS = 3 val WORD_SIZE = 4 val RESP_STATUS = 5 + val OUTSTANDING = 6 + val NCSRS = 7 + val CSR_BASE = 0x800 + val CSR_END = CSR_BASE + NCSRS } import DmaCtrlRegNumbers._ -class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { +class DmaCtrlRegFile(implicit val p: Parameters) extends Module + with HasClientDmaParameters with HasTileLinkParameters { + private val nWriteRegs = 5 - private val nReadRegs = 1 - private val nRegs = nWriteRegs + nReadRegs + private val nRegs = nWriteRegs + 2 val io = new Bundle { val wen = Bool(INPUT) - val addr = UInt(INPUT, log2Up(nRegs)) + val waddr = UInt(INPUT, log2Up(nRegs)) val wdata = UInt(INPUT, dmaSegmentSizeBits) - val rdata = UInt(OUTPUT, dmaSegmentSizeBits) val src_stride = UInt(OUTPUT, dmaSegmentSizeBits) val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits) @@ -272,10 +311,12 @@ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { val nsegments = UInt(OUTPUT, dmaSegmentBits) val word_size = UInt(OUTPUT, dmaWordSizeBits) - val status = UInt(INPUT, dmaStatusBits) + val incr_outstanding = Bool(INPUT) + val xact_outstanding = Bool(OUTPUT) } val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits))) + val waddr = io.waddr(log2Up(NCSRS) - 1, 0) io.src_stride := regs(SRC_STRIDE) io.dst_stride := regs(DST_STRIDE) @@ -283,44 +324,48 @@ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { io.nsegments := regs(NSEGMENTS) io.word_size := regs(WORD_SIZE) - when (io.wen && io.addr < UInt(nWriteRegs)) { - regs.write(io.addr, io.wdata) + when (io.wen && waddr < UInt(nWriteRegs)) { + regs.write(waddr, io.wdata) } - io.rdata := MuxLookup(io.addr, regs(io.addr), Seq( - UInt(RESP_STATUS) -> io.status)) + val outstanding_cnt = TwoWayCounter( + io.incr_outstanding, + io.wen && io.waddr === UInt(OUTSTANDING), + tlMaxClientXacts) + + io.xact_outstanding := outstanding_cnt > UInt(0) } class DmaController(implicit p: Parameters) extends RoCC()(p) with HasClientDmaParameters { io.mem.req.valid := Bool(false) - io.autl.acquire.valid := Bool(false) - io.autl.grant.ready := Bool(false) io.iptw.req.valid := Bool(false) io.pptw.req.valid := Bool(false) + io.resp.valid := Bool(false) + io.interrupt := Bool(false) val cmd = Queue(io.cmd) val inst = cmd.bits.inst val is_transfer = inst.funct < UInt(8) - val is_cr_write = inst.funct === UInt(8) - val is_cr_read = inst.funct === UInt(9) - val is_cr_access = is_cr_write || is_cr_read - - val resp_rd = Reg(io.resp.bits.rd) - val resp_data = Reg(io.resp.bits.data) - - val s_idle :: s_resp :: Nil = Enum(Bits(), 2) - val state = Reg(init = s_idle) val reg_status = Reg(UInt(width = dmaStatusBits)) val crfile = Module(new DmaCtrlRegFile) - crfile.io.addr := cmd.bits.rs1 - crfile.io.wdata := cmd.bits.rs2 - crfile.io.wen := cmd.fire() && is_cr_write + crfile.io.waddr := io.csr.waddr + crfile.io.wdata := io.csr.wdata + crfile.io.wen := io.csr.wen + + io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride + io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride + io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size + io.csr.rdata(NSEGMENTS) := crfile.io.nsegments + io.csr.rdata(WORD_SIZE) := crfile.io.word_size + io.csr.rdata(RESP_STATUS) := reg_status val frontend = Module(new DmaFrontend) - io.dma <> frontend.io.dma io.dptw <> frontend.io.ptw + io.autl <> frontend.io.mem + crfile.io.incr_outstanding := frontend.io.incr_outstanding + frontend.io.host_id := io.host_id frontend.io.cpu.req.valid := cmd.valid && is_transfer frontend.io.cpu.req.bits := ClientDmaRequest( cmd = cmd.bits.inst.funct, @@ -331,26 +376,11 @@ class DmaController(implicit p: Parameters) extends RoCC()(p) segment_size = crfile.io.segment_size, nsegments = crfile.io.nsegments, word_size = crfile.io.word_size) - - cmd.ready := state === s_idle && (!is_transfer || frontend.io.cpu.req.ready) - io.resp.valid := state === s_resp - io.resp.bits.rd := resp_rd - io.resp.bits.data := resp_data - - when (cmd.fire()) { - when (is_cr_read) { - resp_rd := inst.rd - resp_data := crfile.io.rdata - state := s_resp - } - } - - when (io.resp.fire()) { state := s_idle } + cmd.ready := is_transfer && frontend.io.cpu.req.ready when (frontend.io.cpu.resp.valid) { reg_status := frontend.io.cpu.resp.bits.status } - io.busy := (state =/= s_idle) || cmd.valid || frontend.io.busy - io.interrupt := Bool(false) + io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index b383e385..564b0f8d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -311,6 +311,7 @@ object CSRs { val stimehw = 0xa81 val mtimecmph = 0x361 val mtimeh = 0x741 + val mrwbase = 0x790 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 84baa909..4d0a41b3 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -9,6 +9,14 @@ import cde.{Parameters, Field} case object RoccMaxTaggedMemXacts extends Field[Int] case object RoccNMemChannels extends Field[Int] +case object RoccNCSRs extends Field[Int] + +class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) { + val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen)) + val waddr = UInt(OUTPUT, CSR.ADDRSZ) + val wdata = UInt(OUTPUT, xLen) + val wen = Bool(OUTPUT) +} class RoCCInstruction extends Bundle { @@ -33,7 +41,7 @@ class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) { val data = Bits(width = xLen) } -class RoCCInterface(implicit p: Parameters) extends Bundle { +class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) @@ -50,8 +58,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle { val fpu_req = Decoupled(new FPInput) val fpu_resp = Decoupled(new FPResult).flip val exception = Bool(INPUT) - - val dma = new DmaIO + val csr = (new RoCCCSRs).flip + val host_id = UInt(INPUT, log2Up(nCores)) override def cloneType = new RoCCInterface().asInstanceOf[this.type] } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index f67302d9..0e43d162 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -46,6 +46,10 @@ trait HasCoreParameters extends HasAddrMapParameters { val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt val mmioBase = p(MMIOBase) val nCustomMrwCsrs = p(NCustomMRWCSRs) + val roccCsrs = if (p(BuildRoCC).isEmpty) Nil + else p(BuildRoCC).flatMap(_.csrs) + val nRoccCsrs = p(RoccNCSRs) + val nCores = p(HtifKey).nCores // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 233b5b8b..1c9f16a7 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -14,8 +14,8 @@ case class RoccParameters( opcodes: OpcodeSet, generator: Parameters => RoCC, nMemChannels: Int = 0, - useFPU: Boolean = false, - useDma: Boolean = false) + csrs: Seq[Int] = Nil, + useFPU: Boolean = false) abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { @@ -23,7 +23,6 @@ abstract class Tile(resetSignal: Bool = null) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size - val nDmaPorts = buildRocc.filter(_.useDma).size val nDCachePorts = 2 + nRocc val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 @@ -77,12 +76,15 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( cmdRouter.io.in <> core.io.rocc.cmd val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) => - val rocc = accelParams.generator( - p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels })) + val rocc = accelParams.generator(p.alterPartial({ + case RoccNMemChannels => accelParams.nMemChannels + case RoccNCSRs => accelParams.csrs.size + })) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) rocc.io.cmd <> cmdRouter.io.out(i) rocc.io.s := core.io.rocc.s rocc.io.exception := core.io.rocc.exception + rocc.io.host_id := io.host.id dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2 + i) <> dcIF.io.cache uncachedArb.io.in(1 + i) <> rocc.io.autl @@ -107,18 +109,22 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( } } - if (nDmaPorts > 0) { - val dmaArb = Module(new DmaArbiter(nDmaPorts)) - dmaArb.io.in <> roccs.zip(buildRocc) - .filter { case (_, params) => params.useDma } - .map { case (rocc, _) => rocc.io.dma } - io.dma <> dmaArb.io.out - } - core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) + if (p(RoccNCSRs) > 0) { + core.io.rocc.csr.rdata <> roccs.map(_.io.csr.rdata).reduce(_ ++ _) + for ((rocc, accelParams) <- roccs.zip(buildRocc)) { + rocc.io.csr.waddr := core.io.rocc.csr.waddr + rocc.io.csr.wdata := core.io.rocc.csr.wdata + rocc.io.csr.wen := core.io.rocc.csr.wen && + accelParams.csrs + .map(core.io.rocc.csr.waddr === UInt(_)) + .reduce((a, b) => a || b) + } + } + roccs.flatMap(_.io.utl) :+ uncachedArb.io.out } else { Seq(icache.io.mem) }) @@ -128,9 +134,4 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( fpu.io.cp_resp.ready := Bool(false) } } - - if (!usingRocc || nDmaPorts == 0) { - io.dma.req.valid := Bool(false) - io.dma.resp.ready := Bool(false) - } } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index c76ac068..55e7359d 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -193,3 +193,27 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { state := s_ready } } + +class DecoupledTLB(implicit p: Parameters) extends Module { + val io = new Bundle { + val req = Decoupled(new TLBReq).flip + val resp = Decoupled(new TLBResp) + val ptw = new TLBPTWIO + } + + val reqq = Queue(io.req) + val tlb = Module(new TLB) + + val resp_helper = DecoupledHelper( + reqq.valid, tlb.io.req.ready, io.resp.ready) + val tlb_miss = tlb.io.resp.miss + + tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready) + tlb.io.req.bits := reqq.bits + reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss) + + io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss) + io.resp.bits := tlb.io.resp + + io.ptw <> tlb.io.ptw +}