// See LICENSE.SiFive for license details.

package sifive.fpgashells.devices.xilinx.xilinxml507mig

import Chisel._
import chisel3.core.{Input, Output}
import freechips.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.subsystem.{AsynchronousCrossing, HasCrossing}
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._

case class XilinxML507MIGParams(
    address: Seq[AddressSet]
)

class MemoryController extends BlackBox {
    val io = IO(new Bundle {
        val sys = new MemorySysIO
        val ddr2 = new MemoryDDR2IO

        val request_addr = Input(UInt(28.W))
        val request_type = Input(Bool())
        val request_data = Input(UInt(256.W))
        val request_mask = Input(UInt(32.W))
        val request_valid = Input(Bool())
        val request_ready = Output(Bool())

        val response_data = Output(UInt(256.W))
        val response_valid = Output(Bool())
        // no ready, as the mig does not wait
    })

    override def desiredName: String = "memory_controller"
}

class ResponseQueueIO extends Bundle {
    val read = Bool()
    val source = UInt()
    val size = UInt()
}

class XilinxML507MIGToTL(c: XilinxML507MIGParams)(implicit p: Parameters) extends LazyModule with HasCrossing {
    // Corresponds to MIG interface with 64 bit width and a burst length of 4
    val width = 256
    val beatBytes = width/8 // 32 byte (half a cache-line, fragmented)

    val address_range = AddressRange.fromSets(c.address).head
    require(log2Ceil(address_range.size) == 28, "Max 256MiB DIMMs supported")
    val crossing = AsynchronousCrossing(1)

    val device = new MemoryDevice
    val node = TLManagerNode(
        Seq(TLManagerPortParameters(
            Seq(TLManagerParameters(
                address         = c.address,
                resources       = device.reg,
                regionType      = RegionType.UNCACHED,
                executable      = true,
                supportsGet     = TransferSizes(1, beatBytes),
                supportsPutFull = TransferSizes(1, beatBytes),
                fifoId          = Some(0) // in-order
            )),
            beatBytes = beatBytes
        ))
    )
    // We could possibly also support supportsPutPartial, as we need support
    // for masks anyway because of the possibility of transfers smaller that
    // the data width (size signal, see below).
    // Seems we can: TL$7.3

    lazy val module = new LazyModuleImp(this) {
        val io = IO(new Bundle {
            val port_sys = new MemorySysIO
            val port_ddr2 = new MemoryDDR2IO
        })

        val controller = Module(new MemoryController)
        io.port_sys <> controller.io.sys
        io.port_ddr2 <> controller.io.ddr2

        // in: TLBundle, edge: TLEdgeIn
        val (in, edge) = node.in(0)

        // Due to the TLFragmenter defined below, all messages are 32 bytes or
        // smaller. The data signal of the TL channels is also 32 bytes, so
        // all messages will be transfered in a single beat.
        // Also, TL guarantees (see TL$4.6) that the payload of a data message
        // is always aligned to the width of the beat, e.g. in case of a 32
        // byte data signal, data[7:0] will always have address 0x***00000 and
        // data[255:247] address 0x***11111. It is also guaranteed that the
        // mask bits always correctly reflect the active bytes inside the beat
        // with respect to the size and address. So we can directly forward
        // the mask, (relative) address and data to the MIG interface.

        // An AddressSet is always aligned, so we don't need to subtract the
        // base address, we can just take the lower bits. The lowest 5 bits
        // are used for indexing the 32 byte word of the MIG.
        val address = in.a.bits.address(27, 0) & "hFFFFFE0".U

        // Save the source, size and type of the requests in a queue so we
        // can synthesize the right responses in fifo order. The length also
        // determines the maximum number of in-flight requests.
        val ack_queue = Module(new Queue(new ResponseQueueIO, 2))

        // Pass data directly to the controller
        controller.io.request_addr := address
        controller.io.request_type := !edge.hasData(in.a.bits)
        controller.io.request_data := in.a.bits.data
        // TL uses high to indicate valid data while mig uses low
        controller.io.request_mask := ~ in.a.bits.mask

        ack_queue.io.enq.bits.read   := !edge.hasData(in.a.bits)
        ack_queue.io.enq.bits.source := in.a.bits.source
        ack_queue.io.enq.bits.size   := in.a.bits.size

        // We are ready when the controller and the queue input are ready
        in.a.ready := controller.io.request_ready && ack_queue.io.enq.ready
        // Both queues only latch data if the other is ready, so that data
        // is latched into both queues or not at all
        controller.io.request_valid := in.a.valid && ack_queue.io.enq.ready
        ack_queue.io.enq.valid      := in.a.valid && controller.io.request_ready


        // We have to buffer the responses from the MIG as it has no internal
        // buffer and will output its read responses only for one cycle. To
        // avoid losing any responses, this queue *must* be at least as wide
        // as the ack queue, so that we can catch all responses, even if the
        // ack queue is completely filled with read requests.
        val response_queue = Module(new Queue(controller.io.response_data, 2))

        response_queue.io.enq.bits  := controller.io.response_data
        response_queue.io.enq.valid := controller.io.response_valid
        // MIG does not support delaying a response, so we ignore enq.ready.
        // This will result in lost reads and returning wrong data in further
        // AccessAckData messages, so this must be avoided (see above).

        // Acks may or may not contain data depending on the request, but we
        // can always pass the data, even if it is invalid in the write case,
        // because it is ignored for AccessAck responses
        val response_read = ack_queue.io.deq.bits.read
        in.d.bits.opcode := Mux(response_read, TLMessages.AccessAckData, TLMessages.AccessAck)
        in.d.bits.param  := UInt(0) // reserved, must be 0
        in.d.bits.size   := ack_queue.io.deq.bits.size
        in.d.bits.source := ack_queue.io.deq.bits.source
        in.d.bits.sink   := UInt(0) // ignored
        in.d.bits.data   := response_queue.io.deq.bits
        in.d.bits.error  := Bool(false)

        // The data is valid when the ack queue data is valid (write case) or
        // when the ack *and* response queues are valid (read case)
        in.d.valid       := ack_queue.io.deq.valid && (!response_read ||
                            response_queue.io.deq.valid)
        // Let the ack queue dequeue when the master is ready (write case) or
        // when the master is ready *and* there is a valid response (read case)
        ack_queue.io.deq.ready      := in.d.ready && (!response_read ||
                                       response_queue.io.deq.valid)
        // Let the response queue dequeue when the master is ready and there
        // is a valid read ack waiting
        response_queue.io.deq.ready := in.d.ready && response_read &&
                                       ack_queue.io.deq.valid


        // Tie off unused channels
        in.b.valid := Bool(false)
        in.c.ready := Bool(true)
        in.e.ready := Bool(true)
    }
}

class XilinxML507MIG(c : XilinxML507MIGParams)(implicit p: Parameters) extends LazyModule {
    // The Fragmenter will not fragment messages <= 32 bytes, so all
    // slaves have to support this size. 64 byte specifies the maximum
    // supported transfer size that the slave side of the fragmenter supports
    // against the master (here the main memory bus). Specifying alwaysMin as
    // true results in all messages being fragmented to the minimal size
    // (32 byte). In TL1 terms, slaves correspond roughly to managers and
    // masters to clients (confusingly…).
    val fragmenter = LazyModule(new TLFragmenter(32, 64, alwaysMin=true))
    val island = LazyModule(new XilinxML507MIGToTL(c))

    val node: TLInwardNode =
        island.node := island.crossTLIn := fragmenter.node

    lazy val module = new LazyModuleImp(this) {
        val io = IO(new Bundle {
            val port_sys = new MemorySysIO
            val port_ddr2 = new MemoryDDR2IO
        })

        io.port_sys <> island.module.io.port_sys
        io.port_ddr2 <> island.module.io.port_ddr2

        // The MIGToTL module lives in a separate clock domain together with
        // the MIG, which is why it is called "island".
        island.module.clock := io.port_sys.clk0
        island.module.reset := io.port_sys.reset
    }
}