1
0
rocket-chip/uncore/src/main/scala/memserdes.scala
2015-02-28 17:32:03 -08:00

567 lines
20 KiB
Scala

// See LICENSE for license details.
package uncore
import Chisel._
import scala.math._
case object PAddrBits extends Field[Int]
case object VAddrBits extends Field[Int]
case object PgIdxBits extends Field[Int]
case object ASIdBits extends Field[Int]
case object PermBits extends Field[Int]
case object PPNBits extends Field[Int]
case object VPNBits extends Field[Int]
case object MIFAddrBits extends Field[Int]
case object MIFDataBits extends Field[Int]
case object MIFTagBits extends Field[Int]
case object MIFDataBeats extends Field[Int]
trait HasMemData extends Bundle {
val data = Bits(width = params(MIFDataBits))
}
trait HasMemAddr extends Bundle {
val addr = UInt(width = params(MIFAddrBits))
}
trait HasMemTag extends Bundle {
val tag = UInt(width = params(MIFTagBits))
}
class MemReqCmd extends HasMemAddr with HasMemTag {
val rw = Bool()
}
class MemResp extends HasMemData with HasMemTag
class MemData extends HasMemData
class MemIO extends Bundle {
val req_cmd = Decoupled(new MemReqCmd)
val req_data = Decoupled(new MemData)
val resp = Decoupled(new MemResp).flip
}
class MemPipeIO extends Bundle {
val req_cmd = Decoupled(new MemReqCmd)
val req_data = Decoupled(new MemData)
val resp = Valid(new MemResp).flip
}
class MemSerializedIO(w: Int) extends Bundle
{
val req = Decoupled(Bits(width = w))
val resp = Valid(Bits(width = w)).flip
}
class MemSerdes(w: Int) extends Module
{
val io = new Bundle {
val wide = new MemIO().flip
val narrow = new MemSerializedIO(w)
}
val abits = io.wide.req_cmd.bits.toBits.getWidth
val dbits = io.wide.req_data.bits.toBits.getWidth
val rbits = io.wide.resp.bits.getWidth
val dbeats = params(MIFDataBeats)
val out_buf = Reg(Bits())
val in_buf = Reg(Bits())
val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5)
val state = Reg(init=s_idle)
val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w)))
val data_send_cnt = Reg(init=UInt(0, log2Up(dbeats)))
val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w)
val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w)
when (io.narrow.req.valid && io.narrow.req.ready) {
send_cnt := send_cnt + UInt(1)
out_buf := out_buf >> UInt(w)
}
when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) {
out_buf := io.wide.req_cmd.bits.toBits
}
when (io.wide.req_data.valid && io.wide.req_data.ready) {
out_buf := io.wide.req_data.bits.toBits
}
io.wide.req_cmd.ready := state === s_idle
io.wide.req_data.ready := state === s_write_idle
io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data
io.narrow.req.bits := out_buf
when (state === s_idle && io.wide.req_cmd.valid) {
state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr)
}
when (state === s_read_addr && adone) {
state := s_idle
send_cnt := UInt(0)
}
when (state === s_write_addr && adone) {
state := s_write_idle
send_cnt := UInt(0)
}
when (state === s_write_idle && io.wide.req_data.valid) {
state := s_write_data
}
when (state === s_write_data && ddone) {
data_send_cnt := data_send_cnt + UInt(1)
state := Mux(data_send_cnt === UInt(dbeats-1), s_idle, s_write_idle)
send_cnt := UInt(0)
}
val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
val data_recv_cnt = Reg(init=UInt(0, log2Up(dbeats)))
val resp_val = Reg(init=Bool(false))
resp_val := Bool(false)
when (io.narrow.resp.valid) {
recv_cnt := recv_cnt + UInt(1)
when (recv_cnt === UInt((rbits-1)/w)) {
recv_cnt := UInt(0)
data_recv_cnt := data_recv_cnt + UInt(1)
resp_val := Bool(true)
}
in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w))
}
io.wide.resp.valid := resp_val
io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf)
}
class MemDesserIO(w: Int) extends Bundle {
val narrow = new MemSerializedIO(w).flip
val wide = new MemIO
}
class MemDesser(w: Int) extends Module // test rig side
{
val io = new MemDesserIO(w)
val abits = io.wide.req_cmd.bits.toBits.getWidth
val dbits = io.wide.req_data.bits.toBits.getWidth
val rbits = io.wide.resp.bits.getWidth
val dbeats = params(MIFDataBeats)
require(dbits >= abits && rbits >= dbits)
val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
val data_recv_cnt = Reg(init=UInt(0, log2Up(dbeats)))
val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w)
val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w)
val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w)
val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5)
val state = Reg(init=s_cmd_recv)
val in_buf = Reg(Bits())
when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) {
recv_cnt := recv_cnt + UInt(1)
in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w))
}
io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv
when (state === s_cmd_recv && adone) {
state := s_cmd
recv_cnt := UInt(0)
}
when (state === s_cmd && io.wide.req_cmd.ready) {
state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply)
}
when (state === s_data_recv && ddone) {
state := s_data
recv_cnt := UInt(0)
}
when (state === s_data && io.wide.req_data.ready) {
state := s_data_recv
when (data_recv_cnt === UInt(dbeats-1)) {
state := s_cmd_recv
}
data_recv_cnt := data_recv_cnt + UInt(1)
}
when (rdone) { // state === s_reply
when (data_recv_cnt === UInt(dbeats-1)) {
state := s_cmd_recv
}
recv_cnt := UInt(0)
data_recv_cnt := data_recv_cnt + UInt(1)
}
val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w)
io.wide.req_cmd.valid := state === s_cmd
io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd)
io.wide.req_data.valid := state === s_data
io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w)
val dataq = Module(new Queue(new MemResp, dbeats))
dataq.io.enq <> io.wide.resp
dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w)
io.narrow.resp.valid := dataq.io.deq.valid
io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UInt(w))
}
//Adapter between a TileLinkIO and a UncachedTileLinkIO, merges voluntary
//Adapter betweewn an UncachedTileLinkIO and a mem controller MemIO
class MemIOTileLinkIOConverter(qDepth: Int) extends Module {
val io = new Bundle {
val tl = new TileLinkIO().flip
val mem = new MemIO
}
val mifTagBits = params(MIFTagBits)
val mifDataBits = params(MIFDataBits)
val mifDataBeats = params(MIFDataBeats)
val tlDataBits = params(TLDataBits)
val tlDataBeats = params(TLDataBeats)
val dataBits = tlDataBits*tlDataBeats
require(tlDataBits*tlDataBeats == mifDataBits*mifDataBeats)
require(params(TLClientXactIdBits) <= params(MIFTagBits))
io.tl.acquire.ready := Bool(false)
io.tl.probe.valid := Bool(false)
io.tl.release.ready := Bool(false)
io.tl.finish.ready := Bool(true)
io.mem.resp.ready := Bool(false)
val gnt_arb = Module(new Arbiter(new LogicalNetworkIO(new Grant), 2))
io.tl.grant <> gnt_arb.io.out
val acq_has_data = io.tl.acquire.bits.payload.hasData()
val rel_has_data = io.tl.release.bits.payload.hasData()
// Decompose outgoing TL Acquires into MemIO cmd and data
val active_out = Reg(init=Bool(false))
val cmd_sent_out = Reg(init=Bool(false))
val tag_out = Reg(Bits())
val addr_out = Reg(Bits())
val has_data = Reg(init=Bool(false))
val data_from_rel = Reg(init=Bool(false))
val (tl_cnt_out, tl_wrap_out) =
Counter((io.tl.acquire.fire() && acq_has_data) ||
(io.tl.release.fire() && rel_has_data), tlDataBeats)
val tl_done_out = Reg(init=Bool(false))
val make_grant_ack = Reg(init=Bool(false))
val grant_for_rel = Grant(
is_builtin_type = Bool(true),
g_type = Grant.voluntaryAckType,
client_xact_id = tag_out,
manager_xact_id = UInt(0))
val grant_for_acq_write = ManagerMetadata.onReset.makeGrant(
acq = Acquire(
is_builtin_type = tag_out(0),
a_type = tag_out >> UInt(1),
client_xact_id = tag_out >> UInt(io.tl.tlAcquireTypeBits+1),
addr_block = UInt(0)), //DNC
manager_xact_id = UInt(0))
gnt_arb.io.in(1).valid := Bool(false)
gnt_arb.io.in(1).bits.payload := Mux(data_from_rel, grant_for_rel, grant_for_acq_write)
if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) {
val mem_cmd_q = Module(new Queue(new MemReqCmd, qDepth))
val mem_data_q = Module(new Queue(new MemData, qDepth))
mem_cmd_q.io.enq.valid := Bool(false)
mem_data_q.io.enq.valid := Bool(false)
val (mif_cnt_out, mif_wrap_out) = Counter(mem_data_q.io.enq.fire(), mifDataBeats)
val mif_done_out = Reg(init=Bool(false))
val tl_buf_out = Vec.fill(tlDataBeats){ Reg(io.tl.acquire.bits.payload.data.clone) }
val mif_buf_out = Vec.fill(mifDataBeats){ new MemData }
mif_buf_out := mif_buf_out.fromBits(tl_buf_out.toBits)
val mif_prog_out = (mif_cnt_out+UInt(1, width = log2Up(mifDataBeats+1)))*UInt(mifDataBits)
val tl_prog_out = tl_cnt_out*UInt(tlDataBits)
when(!active_out){
io.tl.release.ready := Bool(true)
io.tl.acquire.ready := !io.tl.release.valid
when(io.tl.release.valid) {
active_out := Bool(true)
cmd_sent_out := Bool(false)
tag_out := io.tl.release.bits.payload.client_xact_id
addr_out := io.tl.release.bits.payload.addr_block
has_data := rel_has_data
data_from_rel := Bool(true)
make_grant_ack := Bool(true)
tl_done_out := tl_wrap_out
tl_buf_out(tl_cnt_out) := io.tl.release.bits.payload.data
} .elsewhen(io.tl.acquire.valid) {
active_out := Bool(true)
cmd_sent_out := Bool(false)
tag_out := Cat(io.tl.acquire.bits.payload.client_xact_id,
io.tl.acquire.bits.payload.a_type,
io.tl.acquire.bits.payload.is_builtin_type)
addr_out := io.tl.acquire.bits.payload.addr_block
has_data := acq_has_data
data_from_rel := Bool(false)
make_grant_ack := acq_has_data
tl_done_out := tl_wrap_out
tl_buf_out(tl_cnt_out) := io.tl.acquire.bits.payload.data
}
}
when(active_out) {
mem_cmd_q.io.enq.valid := !cmd_sent_out
cmd_sent_out := cmd_sent_out || mem_cmd_q.io.enq.fire()
when(has_data) {
when(!tl_done_out) {
io.tl.acquire.ready := Bool(true)
when(io.tl.acquire.valid) {
tl_buf_out(tl_cnt_out) := Mux(data_from_rel,
io.tl.release.bits.payload.data,
io.tl.acquire.bits.payload.data)
}
}
when(!mif_done_out) {
mem_data_q.io.enq.valid := tl_done_out || mif_prog_out <= tl_prog_out
}
}
when(tl_wrap_out) { tl_done_out := Bool(true) }
when(mif_wrap_out) { mif_done_out := Bool(true) }
when(tl_done_out && make_grant_ack) {
gnt_arb.io.in(1).valid := Bool(true)
when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) }
}
when(cmd_sent_out && (!has_data || mif_done_out) && !make_grant_ack) {
active_out := Bool(false)
}
}
mem_cmd_q.io.enq.bits.rw := has_data
mem_cmd_q.io.enq.bits.tag := tag_out
mem_cmd_q.io.enq.bits.addr := addr_out
mem_data_q.io.enq.bits.data := mif_buf_out(mif_cnt_out).data
io.mem.req_cmd <> mem_cmd_q.io.deq
io.mem.req_data <> mem_data_q.io.deq
} else { // Don't make the data buffers and try to flow cmd and data
io.mem.req_cmd.valid := Bool(false)
io.mem.req_data.valid := Bool(false)
io.mem.req_cmd.bits.rw := has_data
io.mem.req_cmd.bits.tag := tag_out
io.mem.req_cmd.bits.addr := addr_out
io.mem.req_data.bits.data := Mux(data_from_rel,
io.tl.release.bits.payload.data,
io.tl.acquire.bits.payload.data)
when(!active_out){
io.tl.release.ready := io.mem.req_data.ready
io.tl.acquire.ready := io.mem.req_data.ready && !io.tl.release.valid
io.mem.req_data.valid := (io.tl.release.valid && rel_has_data) ||
(io.tl.acquire.valid && acq_has_data)
when(io.mem.req_data.ready && (io.tl.release.valid || io.tl.acquire.valid)) {
active_out := !io.mem.req_cmd.ready || io.mem.req_data.valid
io.mem.req_cmd.valid := Bool(true)
cmd_sent_out := io.mem.req_cmd.ready
tag_out := io.mem.req_cmd.bits.tag
addr_out := io.mem.req_data.bits.data
has_data := io.mem.req_cmd.bits.rw
tl_done_out := tl_wrap_out
when(io.tl.release.valid) {
data_from_rel := Bool(true)
make_grant_ack := Bool(true)
io.mem.req_cmd.bits.rw := rel_has_data
io.mem.req_cmd.bits.tag := io.tl.release.bits.payload.client_xact_id
io.mem.req_cmd.bits.addr := io.tl.release.bits.payload.addr_block
io.mem.req_data.bits.data := io.tl.release.bits.payload.data
} .elsewhen(io.tl.acquire.valid) {
data_from_rel := Bool(false)
make_grant_ack := acq_has_data
io.mem.req_cmd.bits.rw := acq_has_data
io.mem.req_cmd.bits.tag := Cat(io.tl.acquire.bits.payload.client_xact_id,
io.tl.acquire.bits.payload.a_type,
io.tl.acquire.bits.payload.is_builtin_type)
io.mem.req_cmd.bits.addr := io.tl.acquire.bits.payload.addr_block
io.mem.req_data.bits.data := io.tl.acquire.bits.payload.data
}
}
}
when(active_out) {
io.mem.req_cmd.valid := !cmd_sent_out
cmd_sent_out := cmd_sent_out || io.mem.req_cmd.fire()
when(has_data && !tl_done_out) {
when(data_from_rel) {
io.tl.release.ready := io.mem.req_data.ready
io.mem.req_data.valid := io.tl.release.valid
} .otherwise {
io.tl.acquire.ready := io.mem.req_data.ready
io.mem.req_data.valid := io.tl.acquire.valid
}
}
when(tl_wrap_out) { tl_done_out := Bool(true) }
when(tl_done_out && make_grant_ack) {
gnt_arb.io.in(1).valid := Bool(true)
when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) }
}
when(cmd_sent_out && (!has_data || tl_done_out) && !make_grant_ack) {
active_out := Bool(false)
}
}
}
// Aggregate incoming MemIO responses into TL Grants
val active_in = Reg(init=Bool(false))
val (tl_cnt_in, tl_wrap_in) = Counter(io.tl.grant.fire() && io.tl.grant.bits.payload.hasMultibeatData(), tlDataBeats)
val tag_in = Reg(UInt(width = mifTagBits))
if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) {
val (mif_cnt_in, mif_wrap_in) = Counter(io.mem.resp.fire(), mifDataBeats) // TODO: Assumes all resps have data
val mif_done_in = Reg(init=Bool(false))
val mif_buf_in = Vec.fill(mifDataBeats){ Reg(new MemData) }
val tl_buf_in = Vec.fill(tlDataBeats){ io.tl.acquire.bits.payload.data.clone }
tl_buf_in := tl_buf_in.fromBits(mif_buf_in.toBits)
val tl_prog_in = (tl_cnt_in+UInt(1, width = log2Up(tlDataBeats+1)))*UInt(tlDataBits)
val mif_prog_in = mif_cnt_in*UInt(mifDataBits)
gnt_arb.io.in(0).bits.payload := ManagerMetadata.onReset.makeGrant(
acq = Acquire(
is_builtin_type = tag_in(0),
a_type = tag_in >> UInt(1),
client_xact_id = tag_in >> UInt(io.tl.tlAcquireTypeBits+1),
addr_block = UInt(0)), //DNC
manager_xact_id = UInt(0),
addr_beat = tl_cnt_in,
data = tl_buf_in(tl_cnt_in))
when(!active_in) {
io.mem.resp.ready := Bool(true)
when(io.mem.resp.valid) {
active_in := Bool(true)
mif_done_in := mif_wrap_in
tag_in := io.mem.resp.bits.tag
mif_buf_in(tl_cnt_in).data := io.mem.resp.bits.data
}
}
when(active_in) {
gnt_arb.io.in(0).valid := mif_done_in || tl_prog_in <= mif_prog_in
when(!mif_done_in) {
io.mem.resp.ready := Bool(true)
when(io.mem.resp.valid) {
mif_buf_in(mif_cnt_in).data := io.mem.resp.bits.data
}
}
when(mif_wrap_in) { mif_done_in := Bool(true) }
when(tl_wrap_in) { active_in := Bool(false) }
}
} else { // Don't generate all the uneeded data buffers and flow resp
gnt_arb.io.in(0).valid := io.mem.resp.valid
io.mem.resp.ready := gnt_arb.io.in(0).ready
gnt_arb.io.in(0).bits.payload :=
ManagerMetadata.onReset.makeGrant(
acq = Acquire(
is_builtin_type = io.mem.resp.bits.tag(0),
a_type = io.mem.resp.bits.tag >> UInt(1),
client_xact_id = io.mem.resp.bits.tag >> UInt(io.tl.tlAcquireTypeBits+1),
addr_block = UInt(0)), //DNC
manager_xact_id = UInt(0),
addr_beat = tl_cnt_in,
data = io.mem.resp.bits.data)
}
}
class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module
{
val io = new QueueIO(data, entries)
require(isPow2(entries) && entries > 1)
val do_flow = Bool()
val do_enq = io.enq.fire() && !do_flow
val do_deq = io.deq.fire() && !do_flow
val maybe_full = Reg(init=Bool(false))
val enq_ptr = Counter(do_enq, entries)._1
val deq_ptr = Counter(do_deq, entries)._1
when (do_enq != do_deq) { maybe_full := do_enq }
val ptr_match = enq_ptr === deq_ptr
val empty = ptr_match && !maybe_full
val full = ptr_match && maybe_full
val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2)
do_flow := empty && io.deq.ready
val ram = Mem(data, entries, seqRead = true)
val ram_addr = Reg(Bits())
val ram_out_valid = Reg(Bool())
ram_out_valid := Bool(false)
when (do_enq) { ram(enq_ptr) := io.enq.bits }
when (io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)) {
ram_out_valid := Bool(true)
ram_addr := Mux(io.deq.valid, deq_ptr + UInt(1), deq_ptr)
}
io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid)
io.enq.ready := !full
io.deq.bits := Mux(empty, io.enq.bits, ram(ram_addr))
}
class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module
{
val io = new QueueIO(data, entries)
val fq = Module(new HellaFlowQueue(entries)(data))
io.enq <> fq.io.enq
io.deq <> Queue(fq.io.deq, 1, pipe = true)
}
object HellaQueue
{
def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = {
val q = Module((new HellaQueue(entries)) { enq.bits.clone })
q.io.enq.valid := enq.valid // not using <> so that override is allowed
q.io.enq.bits := enq.bits
enq.ready := q.io.enq.ready
q.io.deq
}
}
class MemPipeIOMemIOConverter(numRequests: Int, refillCycles: Int) extends Module {
val io = new Bundle {
val cpu = new MemIO().flip
val mem = new MemPipeIO
}
val numEntries = numRequests * refillCycles
val size = log2Down(numEntries) + 1
val inc = Bool()
val dec = Bool()
val count = Reg(init=UInt(numEntries, size))
val watermark = count >= UInt(refillCycles)
when (inc && !dec) {
count := count + UInt(1)
}
when (!inc && dec) {
count := count - UInt(refillCycles)
}
when (inc && dec) {
count := count - UInt(refillCycles-1)
}
val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark
io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask
io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask
io.mem.req_cmd.bits := io.cpu.req_cmd.bits
io.mem.req_data <> io.cpu.req_data
val resp_dataq = Module((new HellaQueue(numEntries)) { new MemResp })
resp_dataq.io.enq <> io.mem.resp
io.cpu.resp <> resp_dataq.io.deq
inc := resp_dataq.io.deq.fire()
dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw
}
class MemPipeIOTileLinkIOConverter(outstanding: Int, refillCycles: Int) extends Module {
val io = new Bundle {
val tl = new TileLinkIO().flip
val mem = new MemPipeIO
}
val a = Module(new MemIOTileLinkIOConverter(1))
val b = Module(new MemPipeIOMemIOConverter(outstanding, refillCycles))
a.io.tl <> io.tl
b.io.cpu.req_cmd <> Queue(a.io.mem.req_cmd, 2, pipe=true)
b.io.cpu.req_data <> Queue(a.io.mem.req_data, refillCycles, pipe=true)
a.io.mem.resp <> b.io.cpu.resp
b.io.mem <> io.mem
}