1
0

reorganize moving non-submodule packages into src/main/scala

This commit is contained in:
Howard Mao
2016-08-19 10:58:56 -07:00
parent f78da0b0ea
commit 7b20609d4d
110 changed files with 3 additions and 381 deletions

View File

@ -0,0 +1,424 @@
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.util._
import uncore.constants._
import cde.{Parameters, Field}
import HastiConstants._
/* We need to translate TileLink requests into operations we can actually execute on AHB.
* The general plan of attack is:
* get => one AHB=>TL read
* put => [multiple AHB write fragments=>nill], one AHB write=>TL
* getBlock => AHB burst reads =>TL
* putBlock => AHB burst writes=>TL
* getPrefetch => noop=>TL
* putPrefetch => noop=>TL
* putAtomic => one AHB=>TL read, one idle, one AHB atom_write=>nill, one idle
*
* This requires that we support a pipeline of optional AHB requests with optional TL responses
*/
class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO
with HasGrantType
with HasClientTransactionId
with HasTileLinkBeatId {
val executeAHB = Bool()
val respondTL = Bool()
val latchAtom = Bool()
val firstBurst = Bool()
val finalBurst = Bool()
val cmd = Bits(width = M_SZ) // atomic op
}
// AHB stage1: translate TileLink Acquires into AHBRequests
class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
with HasHastiParameters
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe
val request = new DecoupledIO(new AHBRequestIO)
}
// Match the AHB burst with a TileLink {Put,Get}Block
val burstSize = tlDataBeats match {
case 1 => HBURST_SINGLE
// case 2 not supported by AHB
case 4 => HBURST_WRAP4
case 8 => HBURST_WRAP8
case 16 => HBURST_WRAP16
case _ => throw new java.lang.AssertionError("TileLink beats unsupported by AHB")
}
// Bursts start at 0 and wrap-around back to 0
val finalBurst = UInt(tlDataBeats-1, width = log2Up(tlDataBeats)).asUInt
val firstBurst = UInt(0, width = log2Up(tlDataBeats))
val next_wmask = Wire(UInt(width = tlDataBytes)) // calculated below
// State variables for processing more complicated TileLink Acquires
val s_atom_r :: s_atom_idle1 :: s_atom_w :: s_atom_idle2 :: Nil = Enum(UInt(), 4)
val atom_state = Reg(init = s_atom_r) // never changes if !supportAtomics
val done_wmask = Reg(init = UInt(0, width = tlDataBytes))
val burst = Reg(init = firstBurst)
// Grab some view of the TileLink acquire
val acq_wmask = io.acquire.bits.wmask()
val isReadBurst = io.acquire.bits.is(Acquire.getBlockType)
val isWriteBurst = io.acquire.bits.is(Acquire.putBlockType)
val isBurst = isWriteBurst || isReadBurst
val isAtomic = io.acquire.bits.is(Acquire.putAtomicType) && Bool(supportAtomics)
val isPut = io.acquire.bits.is(Acquire.putType)
// Final states?
val last_wmask = next_wmask === acq_wmask
val last_atom = atom_state === s_atom_idle2
val last_burst = burst === finalBurst
// Block the incoming request until we've fully consumed it
// NOTE: the outgoing grant.valid may happen while acquire.ready is still false;
// for this reason it is essential to have a Queue or a Pipe infront of acquire
io.acquire.ready := io.request.ready && MuxLookup(io.acquire.bits.a_type, Bool(true), Array(
Acquire.getType -> Bool(true),
Acquire.getBlockType -> last_burst, // hold it until the last beat is burst
Acquire.putType -> last_wmask, // only accept the put if we can fully consume its wmask
Acquire.putBlockType -> Bool(true),
Acquire.putAtomicType -> last_atom, // atomic operation stages complete
Acquire.getPrefetchType -> Bool(true),
Acquire.putPrefetchType -> Bool(true)))
// Advance the fragment state
when (io.request.ready && io.acquire.valid && isPut) {
when (last_wmask) { // if this was the last fragment, restart FSM
done_wmask := UInt(0)
} .otherwise {
done_wmask := next_wmask
}
}
// Advance the burst state
// We assume here that TileLink gives us all putBlock beats with nothing between them
when (io.request.ready && io.acquire.valid && isBurst) {
when (last_burst) {
burst := UInt(0)
} .otherwise {
burst := burst + UInt(1)
}
}
// Advance the atomic state machine
when (io.request.ready && io.acquire.valid && isAtomic) {
switch (atom_state) {
is (s_atom_r) { atom_state := s_atom_idle1 }
is (s_atom_idle1) { atom_state := s_atom_w } // idle1 => AMOALU runs on a different clock than AHB slave read
is (s_atom_w) { atom_state := s_atom_idle2 }
is (s_atom_idle2) { atom_state := s_atom_r } // idle2 state is required by AHB after hmastlock is lowered
}
}
// Returns (range=0, range=-1, aligned_wmask, size)
def mask_helper(in_0 : Bool, range : UInt): (Bool, Bool, UInt, UInt) = {
val len = range.getWidth
if (len == 1) {
(range === UInt(0), range === UInt(1), in_0.asUInt() & range, UInt(0))
} else {
val mid = len / 2
val lo = range(mid-1, 0)
val hi = range(len-1, mid)
val (lo_0, lo_1, lo_m, lo_s) = mask_helper(in_0, lo)
val (hi_0, hi_1, hi_m, hi_s) = mask_helper(in_0 && lo_0, hi)
val out_0 = lo_0 && hi_0
val out_1 = lo_1 && hi_1
val out_m = Cat(hi_m, lo_m) | Fill(len, (in_0 && out_1).asUInt())
val out_s = Mux(out_1, UInt(log2Up(len)), Mux(lo_0, hi_s, lo_s))
(out_0, out_1, out_m, out_s)
}
}
val pending_wmask = acq_wmask & ~done_wmask
val put_addr = PriorityEncoder(pending_wmask)
val (wmask_0, _, exec_wmask, put_size) = mask_helper(Bool(true), pending_wmask)
next_wmask := done_wmask | exec_wmask
// Calculate the address, with consideration to put fragments and bursts
val addr_block = io.acquire.bits.addr_block
val addr_beatin= io.acquire.bits.addr_beat
val addr_burst = Mux(isReadBurst, addr_beatin + burst, addr_beatin)
val addr_byte = Mux(isPut, put_addr, io.acquire.bits.addr_byte())
val addr_beat = Mux(isWriteBurst, UInt(0), addr_burst)
val ahbAddr = Cat(addr_block, addr_burst, addr_byte)
val ahbSize = Mux(isPut, put_size, Mux(isBurst, UInt(log2Ceil(tlDataBytes)), io.acquire.bits.op_size()))
val ahbBurst = MuxLookup(io.acquire.bits.a_type, HBURST_SINGLE, Array(
Acquire.getType -> HBURST_SINGLE,
Acquire.getBlockType -> burstSize,
Acquire.putType -> HBURST_SINGLE,
Acquire.putBlockType -> burstSize,
Acquire.putAtomicType -> HBURST_SINGLE,
Acquire.getPrefetchType -> HBURST_SINGLE,
Acquire.putPrefetchType -> HBURST_SINGLE))
val ahbWrite = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
Acquire.getType -> Bool(false),
Acquire.getBlockType -> Bool(false),
Acquire.putType -> Bool(true),
Acquire.putBlockType -> Bool(true),
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
s_atom_r -> Bool(false),
s_atom_idle1 -> Bool(false), // don't care
s_atom_w -> Bool(true),
s_atom_idle2 -> Bool(true))), // don't care
Acquire.getPrefetchType -> Bool(false), // don't care
Acquire.putPrefetchType -> Bool(true))) // don't care
val ahbExecute = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
Acquire.getType -> Bool(true),
Acquire.getBlockType -> Bool(true),
Acquire.putType -> !wmask_0, // handle the case of a Put with no bytes!
Acquire.putBlockType -> Bool(true),
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
s_atom_r -> Bool(true),
s_atom_idle1 -> Bool(false),
s_atom_w -> Bool(true),
s_atom_idle2 -> Bool(false))),
Acquire.getPrefetchType -> Bool(false),
Acquire.putPrefetchType -> Bool(false)))
val respondTL = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
Acquire.getType -> Bool(true),
Acquire.getBlockType -> Bool(true),
Acquire.putType -> last_wmask,
Acquire.putBlockType -> last_burst,
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
s_atom_r -> Bool(true), // they want the old data
s_atom_idle1 -> Bool(false),
s_atom_w -> Bool(false),
s_atom_idle2 -> Bool(false))),
Acquire.getPrefetchType -> Bool(true),
Acquire.putPrefetchType -> Bool(true)))
io.request.valid := io.acquire.valid
io.request.bits.htrans := HTRANS_IDLE // unused/ignored
io.request.bits.haddr := ahbAddr
io.request.bits.hmastlock := isAtomic && atom_state =/= s_atom_idle2
io.request.bits.hwrite := ahbWrite
io.request.bits.hburst := ahbBurst
io.request.bits.hsize := ahbSize
io.request.bits.hprot := HPROT_DATA | HPROT_PRIVILEGED
io.request.bits.hwdata := io.acquire.bits.data
io.request.bits.executeAHB := ahbExecute
io.request.bits.respondTL := respondTL
io.request.bits.latchAtom := isAtomic && atom_state === s_atom_r
io.request.bits.firstBurst := burst === firstBurst
io.request.bits.finalBurst := burst === finalBurst || !isBurst
io.request.bits.cmd := io.acquire.bits.op_code()
io.request.bits.is_builtin_type := Bool(true)
io.request.bits.g_type := io.acquire.bits.getBuiltInGrantType()
io.request.bits.client_xact_id := io.acquire.bits.client_xact_id
io.request.bits.addr_beat := addr_beat
val debugBurst = Reg(UInt())
when (io.request.valid) {
debugBurst := addr_burst - burst
}
// We only support built-in TileLink requests
assert(!io.acquire.valid || io.acquire.bits.is_builtin_type, "AHB bridge only supports builtin TileLink types")
// Ensure alignment of address to size
assert(!io.acquire.valid || (ahbAddr & ((UInt(1) << ahbSize) - UInt(1))) === UInt(0), "TileLink operation misaligned")
// If this is a putBlock, make sure it moves properly
assert(!io.acquire.valid || !isBurst || burst === firstBurst || debugBurst === addr_burst - burst, "TileLink putBlock beats not sequential")
// We better not get an incomplete TileLink acquire
assert(!io.acquire.valid || isBurst || burst === firstBurst, "TileLink never completed a putBlock")
// If we disabled atomic support, we better not see a request
assert(!io.acquire.bits.is(Acquire.putAtomicType) || Bool(supportAtomics))
}
// AHB stage2: execute AHBRequests
class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
with HasHastiParameters
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val request = new DecoupledIO(new AHBRequestIO).flip
val grant = new DecoupledIO(new Grant)
val ahb = new HastiMasterIO()
}
// All AHB outputs are registered (they might be IOs)
val midBurst = Reg(init = Bool(false))
val htrans = Reg(init = HTRANS_IDLE)
val haddr = Reg(UInt())
val hmastlock = Reg(init = Bool(false))
val hwrite = Reg(Bool())
val hburst = Reg(UInt())
val hsize = Reg(init = UInt(0, width = SZ_HSIZE))
val hprot = Reg(UInt())
val hwdata0 = Reg(Bits())
val hwdata1 = Reg(Bits())
val hrdata = Reg(Bits())
io.ahb.htrans := htrans
io.ahb.haddr := haddr
io.ahb.hmastlock := hmastlock
io.ahb.hwrite := hwrite
io.ahb.hburst := hburst
io.ahb.hsize := hsize
io.ahb.hprot := hprot
io.ahb.hwdata := hwdata1 // one cycle after the address phase
// TileLink response data needed in data phase
val respondTL0 = Reg(init = Bool(false))
val respondTL1 = Reg(init = Bool(false))
val latchAtom0 = Reg(init = Bool(false))
val latchAtom1 = Reg(init = Bool(false))
val executeAHB0 = Reg(init = Bool(false))
val executeAHB1 = Reg(init = Bool(false))
val bubble = Reg(init = Bool(true)) // nothing useful in address phase
val cmd = Reg(Bits())
val g_type0 = Reg(UInt())
val g_type1 = Reg(UInt())
val client_xact_id0 = Reg(Bits())
val client_xact_id1 = Reg(Bits())
val addr_beat0 = Reg(UInt())
val addr_beat1 = Reg(UInt())
val grant1 = Reg(new Grant)
// It is allowed to progress from Idle/Busy during a wait state
val addrReady = io.ahb.hready || bubble || (!executeAHB1 && !executeAHB0)
val dataReady = io.ahb.hready || !executeAHB1
// Only accept a new AHBRequest if we have enough buffer space in the pad
// to accomodate a persistent drop in TileLink's grant.ready
io.request.ready := addrReady && io.grant.ready
// htrans must be updated even if no request is valid
when (addrReady) {
when (io.request.fire() && io.request.bits.executeAHB) {
midBurst := !io.request.bits.finalBurst
when (io.request.bits.firstBurst) {
htrans := HTRANS_NONSEQ
} .otherwise {
htrans := HTRANS_SEQ
}
} .otherwise {
when (midBurst) {
htrans := HTRANS_BUSY
} .otherwise {
htrans := HTRANS_IDLE
}
}
}
// Address phase, clear repondTL when we have nothing to do
when (addrReady) {
when (io.request.fire()) {
respondTL0 := io.request.bits.respondTL
latchAtom0 := io.request.bits.latchAtom
executeAHB0:= io.request.bits.executeAHB
bubble := Bool(false)
} .otherwise {
respondTL0 := Bool(false)
latchAtom0 := Bool(false)
executeAHB0:= Bool(false)
bubble := Bool(true) // an atom-injected Idle is not a bubble!
}
}
// Transfer bulk address phase
when (io.request.fire()) {
haddr := io.request.bits.haddr
hmastlock := io.request.bits.hmastlock
hwrite := io.request.bits.hwrite
hburst := io.request.bits.hburst
hsize := io.request.bits.hsize
hprot := io.request.bits.hprot
hwdata0 := io.request.bits.hwdata
cmd := io.request.bits.cmd
g_type0 := io.request.bits.g_type
client_xact_id0 := io.request.bits.client_xact_id
addr_beat0 := io.request.bits.addr_beat
}
// Execute Atomic ops; unused and optimized away if !supportAtomics
val amo_p = p.alterPartial({
case CacheBlockOffsetBits => hastiAddrBits
})
val alu = Module(new AMOALU(hastiDataBits, rhsIsAligned = true)(amo_p))
alu.io.addr := haddr
alu.io.cmd := cmd
alu.io.typ := hsize
alu.io.rhs := hwdata0
alu.io.lhs := hrdata
// Transfer bulk data phase
when (dataReady) {
when (addrReady) {
respondTL1 := respondTL0
latchAtom1 := latchAtom0
executeAHB1 := executeAHB0
} .otherwise {
respondTL1 := Bool(false)
latchAtom1 := Bool(false)
executeAHB1 := Bool(false)
}
hwdata1 := Mux(Bool(supportAtomics), alu.io.out, hwdata0)
g_type1 := g_type0
client_xact_id1 := client_xact_id0
addr_beat1 := addr_beat0
}
// Latch the read result for an atomic operation
when (dataReady && latchAtom1) {
hrdata := io.ahb.hrdata
}
// Only issue TL grant when the slave has provided data
io.grant.valid := dataReady && respondTL1
io.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = g_type1,
client_xact_id = client_xact_id1,
manager_xact_id = UInt(0),
addr_beat = addr_beat1,
data = io.ahb.hrdata)
// We cannot support errors from AHB to TileLink
assert(!io.ahb.hresp, "AHB hresp error detected and cannot be reported via TileLink")
}
class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module
with HasHastiParameters
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val tl = new ClientUncachedTileLinkIO().flip
val ahb = new HastiMasterIO()
}
// Hasti and TileLink widths must agree at this point in the topology
require (tlDataBits == hastiDataBits)
require (p(PAddrBits) == hastiAddrBits)
// AHB does not permit bursts to cross a 1KB boundary
require (tlDataBits * tlDataBeats <= 1024*8)
// tlDataBytes must be a power of 2
require (1 << log2Ceil(tlDataBytes) == tlDataBytes)
// Create the sub-blocks
val fsm = Module(new AHBTileLinkIn(supportAtomics))
val bus = Module(new AHBBusMaster(supportAtomics))
val pad = Module(new Queue(new Grant, 4))
fsm.io.acquire <> Queue(io.tl.acquire, 2) // Pipe is also acceptable
bus.io.request <> fsm.io.request
io.ahb <> bus.io.ahb
io.tl.grant <> pad.io.deq
// The pad is needed to absorb AHB progress while !grant.ready
// We are only 'ready' if the pad has at least 3 cycles of space
bus.io.grant.ready := pad.io.count <= UInt(1)
pad.io.enq.bits := bus.io.grant.bits
pad.io.enq.valid := bus.io.grant.valid
}

View File

@ -0,0 +1,383 @@
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.constants._
import cde.Parameters
import scala.math.min
class IdMapper(val inIdBits: Int, val outIdBits: Int,
val forceMapping: Boolean = false)
(implicit val p: Parameters) extends Module {
val io = new Bundle {
val req = new Bundle {
val valid = Bool(INPUT)
val ready = Bool(OUTPUT)
val in_id = UInt(INPUT, inIdBits)
val out_id = UInt(OUTPUT, outIdBits)
}
val resp = new Bundle {
val valid = Bool(INPUT)
val matches = Bool(OUTPUT)
val out_id = UInt(INPUT, outIdBits)
val in_id = UInt(OUTPUT, inIdBits)
}
}
val maxInXacts = 1 << inIdBits
if (inIdBits <= outIdBits && !forceMapping) {
io.req.ready := Bool(true)
io.req.out_id := io.req.in_id
io.resp.matches := Bool(true)
io.resp.in_id := io.resp.out_id
} else {
val nInXacts = 1 << inIdBits
// No point in allowing more out xacts than in xacts
val nOutXacts = min(1 << outIdBits, nInXacts)
val out_id_free = Reg(init = Vec.fill(nOutXacts){Bool(true)})
val in_id_free = Reg(init = Vec.fill(nInXacts){Bool(true)})
val next_out_id = PriorityEncoder(out_id_free)
val id_mapping = Reg(Vec(nOutXacts, UInt(0, inIdBits)))
val req_fire = io.req.valid && io.req.ready
when (req_fire) {
out_id_free(io.req.out_id) := Bool(false)
in_id_free(io.req.in_id) := Bool(false)
id_mapping(io.req.out_id) := io.req.in_id
}
when (io.resp.valid) {
out_id_free(io.resp.out_id) := Bool(true)
in_id_free(io.resp.in_id) := Bool(true)
}
io.req.ready := out_id_free.reduce(_ || _) && in_id_free(io.req.in_id)
io.req.out_id := next_out_id
io.resp.in_id := id_mapping(io.resp.out_id)
io.resp.matches := !out_id_free(io.resp.out_id)
}
}
class NastiIOTileLinkIOConverterInfo(implicit p: Parameters) extends TLBundle()(p) {
val addr_beat = UInt(width = tlBeatAddrBits)
val subblock = Bool()
}
class NastiIOTileLinkIOConverter(implicit p: Parameters) extends TLModule()(p)
with HasNastiParameters {
val io = new Bundle {
val tl = new ClientUncachedTileLinkIO().flip
val nasti = new NastiIO
}
val dataBits = tlDataBits*tlDataBeats
require(tlDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree") // TODO: remove this restriction
require(tlDataBeats < (1 << nastiXLenBits), "Can't have that many beats")
val has_data = io.tl.acquire.bits.hasData()
val is_subblock = io.tl.acquire.bits.isSubBlockType()
val is_multibeat = io.tl.acquire.bits.hasMultibeatData()
val (tl_cnt_out, tl_wrap_out) = Counter(
io.tl.acquire.fire() && is_multibeat, tlDataBeats)
val get_valid = io.tl.acquire.valid && !has_data
val put_valid = io.tl.acquire.valid && has_data
// Reorder queue saves extra information needed to send correct
// grant back to TL client
val roqIdBits = min(tlClientXactIdBits, nastiXIdBits)
val roq = Module(new ReorderQueue(
new NastiIOTileLinkIOConverterInfo, roqIdBits))
val get_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
val put_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
val get_id_ready = get_id_mapper.io.req.ready
val put_id_mask = is_subblock || io.tl.acquire.bits.addr_beat === UInt(0)
val put_id_ready = put_id_mapper.io.req.ready || !put_id_mask
// For Get/GetBlock, make sure Reorder queue can accept new entry
val get_helper = DecoupledHelper(
get_valid,
roq.io.enq.ready,
io.nasti.ar.ready,
get_id_ready)
val w_inflight = Reg(init = Bool(false))
val w_id_reg = Reg(init = UInt(0, nastiXIdBits))
val w_id = Mux(w_inflight, w_id_reg, put_id_mapper.io.req.out_id)
// For Put/PutBlock, make sure aw and w channel are both ready before
// we send the first beat
val aw_ready = w_inflight || io.nasti.aw.ready
val put_helper = DecoupledHelper(
put_valid,
aw_ready,
io.nasti.w.ready,
put_id_ready)
val (nasti_cnt_out, nasti_wrap_out) = Counter(
io.nasti.r.fire() && !roq.io.deq.data.subblock, tlDataBeats)
roq.io.enq.valid := get_helper.fire(roq.io.enq.ready)
roq.io.enq.bits.tag := io.nasti.ar.bits.id
roq.io.enq.bits.data.addr_beat := io.tl.acquire.bits.addr_beat
roq.io.enq.bits.data.subblock := is_subblock
roq.io.deq.valid := io.nasti.r.fire() && (nasti_wrap_out || roq.io.deq.data.subblock)
roq.io.deq.tag := io.nasti.r.bits.id
get_id_mapper.io.req.valid := get_helper.fire(get_id_ready)
get_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
get_id_mapper.io.resp.out_id := io.nasti.r.bits.id
put_id_mapper.io.req.valid := put_helper.fire(put_id_ready, put_id_mask)
put_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
put_id_mapper.io.resp.valid := io.nasti.b.fire()
put_id_mapper.io.resp.out_id := io.nasti.b.bits.id
// Decompose outgoing TL Acquires into Nasti address and data channels
io.nasti.ar.valid := get_helper.fire(io.nasti.ar.ready)
io.nasti.ar.bits := NastiReadAddressChannel(
id = get_id_mapper.io.req.out_id,
addr = io.tl.acquire.bits.full_addr(),
size = Mux(is_subblock,
io.tl.acquire.bits.op_size(),
UInt(log2Ceil(tlDataBytes))),
len = Mux(is_subblock, UInt(0), UInt(tlDataBeats - 1)))
def mask_helper(all_inside_0: Seq[Bool], defsize: Int): (Seq[Bool], UInt, UInt) = {
val len = all_inside_0.size
if (len == 1) {
(Seq(Bool(true)), UInt(0), UInt(defsize))
} else {
val sub_inside_0 = Seq.tabulate (len/2) { i => all_inside_0(2*i) && all_inside_0(2*i+1) }
val (sub_outside_0, sub_offset, sub_size) = mask_helper(sub_inside_0, defsize+1)
val all_outside_0 = Seq.tabulate (len) { i => sub_outside_0(i/2) && all_inside_0(i^1) }
val odd_outside_0 = Seq.tabulate (len/2) { i => all_outside_0(2*i+1) }
val odd_outside = odd_outside_0.reduce (_ || _)
val all_outside = all_outside_0.reduce (_ || _)
val offset = Cat(sub_offset, odd_outside)
val size = Mux(all_outside, UInt(defsize), sub_size)
(all_outside_0, offset, size)
}
}
val all_inside_0 = (~io.tl.acquire.bits.wmask()).toBools
val (_, put_offset, put_size) = mask_helper(all_inside_0, 0)
io.nasti.aw.valid := put_helper.fire(aw_ready, !w_inflight)
io.nasti.aw.bits := NastiWriteAddressChannel(
id = put_id_mapper.io.req.out_id,
addr = io.tl.acquire.bits.full_addr() |
Mux(is_multibeat, UInt(0), put_offset),
size = Mux(is_multibeat, UInt(log2Ceil(tlDataBytes)), put_size),
len = Mux(is_multibeat, UInt(tlDataBeats - 1), UInt(0)))
io.nasti.w.valid := put_helper.fire(io.nasti.w.ready)
io.nasti.w.bits := NastiWriteDataChannel(
id = w_id,
data = io.tl.acquire.bits.data,
strb = Some(io.tl.acquire.bits.wmask()),
last = Mux(w_inflight,
tl_cnt_out === UInt(tlDataBeats - 1), !is_multibeat))
io.tl.acquire.ready := Mux(has_data,
put_helper.fire(put_valid),
get_helper.fire(get_valid))
when (!w_inflight && io.tl.acquire.fire() && is_multibeat) {
w_inflight := Bool(true)
w_id_reg := w_id
}
when (w_inflight) {
when (tl_wrap_out) { w_inflight := Bool(false) }
}
// Aggregate incoming NASTI responses into TL Grants
val (tl_cnt_in, tl_wrap_in) = Counter(
io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
val gnt_arb = Module(new LockingArbiter(new GrantToDst, 2,
tlDataBeats, Some((gnt: GrantToDst) => gnt.hasMultibeatData())))
io.tl.grant <> gnt_arb.io.out
gnt_arb.io.in(0).valid := io.nasti.r.valid
io.nasti.r.ready := gnt_arb.io.in(0).ready
gnt_arb.io.in(0).bits := Grant(
is_builtin_type = Bool(true),
g_type = Mux(roq.io.deq.data.subblock,
Grant.getDataBeatType, Grant.getDataBlockType),
client_xact_id = get_id_mapper.io.resp.in_id,
manager_xact_id = UInt(0),
addr_beat = Mux(roq.io.deq.data.subblock, roq.io.deq.data.addr_beat, tl_cnt_in),
data = io.nasti.r.bits.data)
assert(!roq.io.deq.valid || roq.io.deq.matches,
"TL -> NASTI converter ReorderQueue: NASTI tag error")
assert(!gnt_arb.io.in(0).valid || get_id_mapper.io.resp.matches,
"TL -> NASTI ID Mapper: NASTI tag error")
gnt_arb.io.in(1).valid := io.nasti.b.valid
io.nasti.b.ready := gnt_arb.io.in(1).ready
gnt_arb.io.in(1).bits := Grant(
is_builtin_type = Bool(true),
g_type = Grant.putAckType,
client_xact_id = put_id_mapper.io.resp.in_id,
manager_xact_id = UInt(0),
addr_beat = UInt(0),
data = Bits(0))
assert(!gnt_arb.io.in(1).valid || put_id_mapper.io.resp.matches, "NASTI tag error")
assert(!io.nasti.r.valid || io.nasti.r.bits.resp === UInt(0), "NASTI read error")
assert(!io.nasti.b.valid || io.nasti.b.bits.resp === UInt(0), "NASTI write error")
}
class TileLinkIONastiIOConverter(implicit p: Parameters) extends TLModule()(p)
with HasNastiParameters {
val io = new Bundle {
val nasti = (new NastiIO).flip
val tl = new ClientUncachedTileLinkIO
}
val (s_idle :: s_put :: Nil) = Enum(Bits(), 2)
val state = Reg(init = s_idle)
private val blockOffset = tlByteAddrBits + tlBeatAddrBits
val aw_req = Reg(new NastiWriteAddressChannel)
val w_tl_id = Reg(io.tl.acquire.bits.client_xact_id)
def is_singlebeat(chan: NastiAddressChannel): Bool =
chan.len === UInt(0)
def is_multibeat(chan: NastiAddressChannel): Bool =
chan.len === UInt(tlDataBeats - 1) && chan.size === UInt(log2Up(tlDataBytes))
def nasti_addr_block(chan: NastiAddressChannel): UInt =
chan.addr(nastiXAddrBits - 1, blockOffset)
def nasti_addr_beat(chan: NastiAddressChannel): UInt =
chan.addr(blockOffset - 1, tlByteAddrBits)
def nasti_addr_byte(chan: NastiAddressChannel): UInt =
chan.addr(tlByteAddrBits - 1, 0)
def size_mask(size: UInt): UInt =
(UInt(1) << (UInt(1) << size)) - UInt(1)
def nasti_wmask(aw: NastiWriteAddressChannel, w: NastiWriteDataChannel): UInt = {
val base = w.strb & size_mask(aw.size)
val addr_byte = nasti_addr_byte(aw)
w.strb & (size_mask(aw.size) << addr_byte)
}
def tl_last(gnt: GrantMetadata): Bool =
!gnt.hasMultibeatData() || gnt.addr_beat === UInt(tlDataBeats - 1)
def tl_b_grant(gnt: GrantMetadata): Bool =
gnt.g_type === Grant.putAckType
assert(!io.nasti.ar.valid ||
is_singlebeat(io.nasti.ar.bits) || is_multibeat(io.nasti.ar.bits),
"NASTI read transaction cannot convert to TileLInk")
assert(!io.nasti.aw.valid ||
is_singlebeat(io.nasti.aw.bits) || is_multibeat(io.nasti.aw.bits),
"NASTI write transaction cannot convert to TileLInk")
val put_count = Reg(init = UInt(0, tlBeatAddrBits))
val get_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
val put_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
when (io.nasti.aw.fire()) {
aw_req := io.nasti.aw.bits
w_tl_id := put_id_mapper.io.req.out_id
state := s_put
}
when (io.nasti.w.fire()) {
put_count := put_count + UInt(1)
when (io.nasti.w.bits.last) {
put_count := UInt(0)
state := s_idle
}
}
val get_acquire = Mux(is_multibeat(io.nasti.ar.bits),
GetBlock(
client_xact_id = get_id_mapper.io.req.out_id,
addr_block = nasti_addr_block(io.nasti.ar.bits)),
Get(
client_xact_id = get_id_mapper.io.req.out_id,
addr_block = nasti_addr_block(io.nasti.ar.bits),
addr_beat = nasti_addr_beat(io.nasti.ar.bits),
addr_byte = nasti_addr_byte(io.nasti.ar.bits),
operand_size = io.nasti.ar.bits.size,
alloc = Bool(false)))
val put_acquire = Mux(is_multibeat(aw_req),
PutBlock(
client_xact_id = w_tl_id,
addr_block = nasti_addr_block(aw_req),
addr_beat = put_count,
data = io.nasti.w.bits.data,
wmask = Some(io.nasti.w.bits.strb)),
Put(
client_xact_id = w_tl_id,
addr_block = nasti_addr_block(aw_req),
addr_beat = nasti_addr_beat(aw_req),
data = io.nasti.w.bits.data,
wmask = Some(nasti_wmask(aw_req, io.nasti.w.bits))))
val get_helper = DecoupledHelper(
io.nasti.ar.valid,
get_id_mapper.io.req.ready,
io.tl.acquire.ready)
get_id_mapper.io.req.valid := get_helper.fire(
get_id_mapper.io.req.ready, state === s_idle)
get_id_mapper.io.req.in_id := io.nasti.ar.bits.id
get_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
val aw_ok = (state === s_idle && !io.nasti.ar.valid)
put_id_mapper.io.req.valid := aw_ok && io.nasti.aw.valid
put_id_mapper.io.req.in_id := io.nasti.aw.bits.id
put_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
put_id_mapper.io.resp.valid := io.nasti.b.fire()
io.tl.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
io.tl.acquire.valid := get_helper.fire(io.tl.acquire.ready, state === s_idle) ||
(state === s_put && io.nasti.w.valid)
io.nasti.ar.ready := get_helper.fire(io.nasti.ar.valid, state === s_idle)
io.nasti.aw.ready := aw_ok && put_id_mapper.io.req.ready
io.nasti.w.ready := (state === s_put && io.tl.acquire.ready)
val nXacts = tlMaxClientXacts * tlMaxClientsPerPort
io.nasti.b.valid := io.tl.grant.valid && tl_b_grant(io.tl.grant.bits)
io.nasti.b.bits := NastiWriteResponseChannel(
id = put_id_mapper.io.resp.in_id)
assert(!io.nasti.b.valid || put_id_mapper.io.resp.matches,
"Put ID does not match")
io.nasti.r.valid := io.tl.grant.valid && !tl_b_grant(io.tl.grant.bits)
io.nasti.r.bits := NastiReadDataChannel(
id = get_id_mapper.io.resp.in_id,
data = io.tl.grant.bits.data,
last = tl_last(io.tl.grant.bits))
assert(!io.nasti.r.valid || get_id_mapper.io.resp.matches,
"Get ID does not match")
io.tl.grant.ready := Mux(tl_b_grant(io.tl.grant.bits),
io.nasti.b.ready, io.nasti.r.ready)
}

View File

@ -0,0 +1,32 @@
// See LICENSE for details
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import cde.Parameters
/** Convert TileLink protocol to Smi protocol */
class SmiIOTileLinkIOConverter(val dataWidth: Int, val addrWidth: Int)
(implicit p: Parameters) extends Module {
val io = new Bundle {
val tl = (new ClientUncachedTileLinkIO).flip
val smi = new SmiIO(dataWidth, addrWidth)
}
def decoupledNastiConnect(outer: NastiIO, inner: NastiIO) {
outer.ar <> Queue(inner.ar)
outer.aw <> Queue(inner.aw)
outer.w <> Queue(inner.w)
inner.r <> Queue(outer.r)
inner.b <> Queue(outer.b)
}
val tl2nasti = Module(new NastiIOTileLinkIOConverter())
val nasti2smi = Module(new SmiIONastiIOConverter(dataWidth, addrWidth))
tl2nasti.io.tl <> io.tl
decoupledNastiConnect(nasti2smi.io.nasti, tl2nasti.io.nasti)
io.smi <> nasti2smi.io.smi
}

View File

@ -0,0 +1,681 @@
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.util._
import uncore.constants._
import cde.Parameters
/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */
object TileLinkIOWrapper {
def apply(tl: ClientUncachedTileLinkIO)(implicit p: Parameters): ClientTileLinkIO = {
val conv = Module(new ClientTileLinkIOWrapper)
conv.io.in <> tl
conv.io.out
}
def apply(tl: UncachedTileLinkIO)(implicit p: Parameters): TileLinkIO = {
val conv = Module(new TileLinkIOWrapper)
conv.io.in <> tl
conv.io.out
}
def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl
def apply(tl: TileLinkIO): TileLinkIO = tl
}
class TileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new UncachedTileLinkIO().flip
val out = new TileLinkIO
}
io.out.acquire <> io.in.acquire
io.in.grant <> io.out.grant
io.out.finish <> io.in.finish
io.out.probe.ready := Bool(true)
io.out.release.valid := Bool(false)
}
class ClientTileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new ClientUncachedTileLinkIO().flip
val out = new ClientTileLinkIO
}
io.out.acquire <> io.in.acquire
io.in.grant <> io.out.grant
io.out.probe.ready := Bool(true)
io.out.release.valid := Bool(false)
}
class ClientTileLinkIOUnwrapper(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new ClientTileLinkIO().flip
val out = new ClientUncachedTileLinkIO
}
val acqArb = Module(new LockingRRArbiter(new Acquire, 2, tlDataBeats,
Some((acq: Acquire) => acq.hasMultibeatData())))
val acqRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
val relRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
val iacq = io.in.acquire.bits
val irel = io.in.release.bits
val ognt = io.out.grant.bits
val acq_roq_enq = iacq.first()
val rel_roq_enq = irel.first()
val acq_roq_ready = !acq_roq_enq || acqRoq.io.enq.ready
val rel_roq_ready = !rel_roq_enq || relRoq.io.enq.ready
val acq_helper = DecoupledHelper(
io.in.acquire.valid,
acq_roq_ready,
acqArb.io.in(0).ready)
val rel_helper = DecoupledHelper(
io.in.release.valid,
rel_roq_ready,
acqArb.io.in(1).ready)
acqRoq.io.enq.valid := acq_helper.fire(acq_roq_ready, acq_roq_enq)
acqRoq.io.enq.bits.data := iacq.isBuiltInType()
acqRoq.io.enq.bits.tag := iacq.client_xact_id
acqArb.io.in(0).valid := acq_helper.fire(acqArb.io.in(0).ready)
acqArb.io.in(0).bits := Acquire(
is_builtin_type = Bool(true),
a_type = Mux(iacq.isBuiltInType(),
iacq.a_type, Acquire.getBlockType),
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
addr_beat = iacq.addr_beat,
data = iacq.data,
union = iacq.union)
io.in.acquire.ready := acq_helper.fire(io.in.acquire.valid)
relRoq.io.enq.valid := rel_helper.fire(rel_roq_ready, rel_roq_enq)
relRoq.io.enq.bits.data := irel.isVoluntary()
relRoq.io.enq.bits.tag := irel.client_xact_id
acqArb.io.in(1).valid := rel_helper.fire(acqArb.io.in(1).ready)
acqArb.io.in(1).bits := PutBlock(
client_xact_id = irel.client_xact_id,
addr_block = irel.addr_block,
addr_beat = irel.addr_beat,
data = irel.data)
io.in.release.ready := rel_helper.fire(io.in.release.valid)
io.out.acquire <> acqArb.io.out
val grant_deq_roq = io.out.grant.fire() && ognt.last()
acqRoq.io.deq.valid := acqRoq.io.deq.matches && grant_deq_roq
acqRoq.io.deq.tag := ognt.client_xact_id
relRoq.io.deq.valid := !acqRoq.io.deq.matches && grant_deq_roq
relRoq.io.deq.tag := ognt.client_xact_id
assert(!grant_deq_roq || acqRoq.io.deq.matches || relRoq.io.deq.matches,
"TileLink Unwrapper: client_xact_id mismatch")
val gnt_builtin = acqRoq.io.deq.data
val gnt_voluntary = relRoq.io.deq.data
val acq_grant = Grant(
is_builtin_type = gnt_builtin,
g_type = Mux(gnt_builtin, ognt.g_type, tlCoh.getExclusiveGrantType),
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat,
data = ognt.data)
assert(!io.in.release.valid || io.in.release.bits.isVoluntary(), "Unwrapper can only process voluntary releases.")
val rel_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.voluntaryAckType, // We should only every be working with voluntary releases
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat,
data = ognt.data)
io.in.grant.valid := io.out.grant.valid
io.in.grant.bits := Mux(acqRoq.io.deq.matches, acq_grant, rel_grant)
io.out.grant.ready := io.in.grant.ready
io.in.probe.valid := Bool(false)
}
object TileLinkWidthAdapter {
def apply(in: ClientUncachedTileLinkIO, outerId: String)(implicit p: Parameters) = {
val outerDataBits = p(TLKey(outerId)).dataBitsPerBeat
if (outerDataBits > in.tlDataBits) {
val widener = Module(new TileLinkIOWidener(in.p(TLId), outerId))
widener.io.in <> in
widener.io.out
} else if (outerDataBits < in.tlDataBits) {
val narrower = Module(new TileLinkIONarrower(in.p(TLId), outerId))
narrower.io.in <> in
narrower.io.out
} else { in }
}
def apply(out: ClientUncachedTileLinkIO, in: ClientUncachedTileLinkIO)(implicit p: Parameters): Unit = {
require(out.tlDataBits * out.tlDataBeats == in.tlDataBits * in.tlDataBeats)
out <> apply(in, out.p(TLId))
}
}
class TileLinkIOWidener(innerTLId: String, outerTLId: String)
(implicit p: Parameters) extends TLModule()(p) {
val paddrBits = p(PAddrBits)
val innerParams = p(TLKey(innerTLId))
val outerParams = p(TLKey(outerTLId))
val innerDataBeats = innerParams.dataBeats
val innerDataBits = innerParams.dataBitsPerBeat
val innerWriteMaskBits = innerParams.writeMaskBits
val innerByteAddrBits = log2Up(innerWriteMaskBits)
val innerMaxXacts = innerParams.maxClientXacts * innerParams.maxClientsPerPort
val innerXactIdBits = log2Up(innerMaxXacts)
val outerDataBeats = outerParams.dataBeats
val outerDataBits = outerParams.dataBitsPerBeat
val outerWriteMaskBits = outerParams.writeMaskBits
val outerByteAddrBits = log2Up(outerWriteMaskBits)
val outerBeatAddrBits = log2Up(outerDataBeats)
val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
val outerMaxClients = outerParams.maxClientsPerPort
val outerClientIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
val outerManagerIdBits = log2Up(outerParams.maxManagerXacts)
val outerBlockAddrBits = paddrBits - outerBlockOffset
require(outerDataBeats <= innerDataBeats)
require(outerDataBits >= innerDataBits)
require(outerDataBits % innerDataBits == 0)
require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
val factor = innerDataBeats / outerDataBeats
val io = new Bundle {
val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
}
val iacq = io.in.acquire.bits
val oacq = io.out.acquire.bits
val ognt = io.out.grant.bits
val ignt = io.in.grant.bits
val shrink = iacq.a_type === Acquire.putBlockType
val stretch = ognt.g_type === Grant.getDataBlockType
val smallget = iacq.a_type === Acquire.getType
val smallput = iacq.a_type === Acquire.putType
val smallgnt = ognt.g_type === Grant.getDataBeatType
val sending_put = Reg(init = Bool(false))
val collecting = Reg(init = Bool(false))
val put_block = Reg(UInt(width = outerBlockAddrBits))
val put_id = Reg(UInt(width = outerClientIdBits))
val put_data = Reg(Vec(factor, UInt(width = innerDataBits)))
val put_wmask = Reg(Vec(factor, UInt(width = innerWriteMaskBits)))
val put_allocate = Reg(Bool())
val (put_beat, put_done) = Counter(io.out.acquire.fire() && oacq.hasMultibeatData(), outerDataBeats)
val (recv_idx, recv_done) = Counter(io.in.acquire.fire() && iacq.hasMultibeatData(), factor)
val in_addr = iacq.full_addr()
val out_addr_block = in_addr(paddrBits - 1, outerBlockOffset)
val out_addr_beat = in_addr(outerBlockOffset - 1, outerByteAddrBits)
val out_addr_byte = in_addr(outerByteAddrBits - 1, 0)
val switch_addr = in_addr(outerByteAddrBits - 1, innerByteAddrBits)
val smallget_switch = Reg(Vec(innerMaxXacts, switch_addr))
def align_data(addr: UInt, data: UInt): UInt =
data << Cat(addr, UInt(0, log2Up(innerDataBits)))
def align_wmask(addr: UInt, wmask: UInt): UInt =
wmask << Cat(addr, UInt(0, log2Up(innerWriteMaskBits)))
val outerConfig = p.alterPartial({ case TLId => outerTLId })
val get_acquire = Get(
client_xact_id = iacq.client_xact_id,
addr_block = out_addr_block,
addr_beat = out_addr_beat,
addr_byte = out_addr_byte,
operand_size = iacq.op_size(),
alloc = iacq.allocate())(outerConfig)
val get_block_acquire = GetBlock(
client_xact_id = iacq.client_xact_id,
addr_block = out_addr_block,
alloc = iacq.allocate())(outerConfig)
val put_acquire = Put(
client_xact_id = iacq.client_xact_id,
addr_block = out_addr_block,
addr_beat = out_addr_beat,
data = align_data(switch_addr, iacq.data),
wmask = Some(align_wmask(switch_addr, iacq.wmask())),
alloc = iacq.allocate())(outerConfig)
val put_block_acquire = PutBlock(
client_xact_id = put_id,
addr_block = put_block,
addr_beat = put_beat,
data = put_data.asUInt,
wmask = Some(put_wmask.asUInt))(outerConfig)
io.out.acquire.valid := sending_put || (!shrink && io.in.acquire.valid)
io.out.acquire.bits := MuxCase(get_block_acquire, Seq(
sending_put -> put_block_acquire,
smallget -> get_acquire,
smallput -> put_acquire))
io.in.acquire.ready := !sending_put && (shrink || io.out.acquire.ready)
when (io.in.acquire.fire() && shrink) {
when (!collecting) {
put_block := out_addr_block
put_id := iacq.client_xact_id
put_allocate := iacq.allocate()
collecting := Bool(true)
}
put_data(recv_idx) := iacq.data
put_wmask(recv_idx) := iacq.wmask()
}
when (io.in.acquire.fire() && smallget) {
smallget_switch(iacq.client_xact_id) := switch_addr
}
when (recv_done) { sending_put := Bool(true) }
when (sending_put && io.out.acquire.ready) { sending_put := Bool(false) }
when (put_done) { collecting := Bool(false) }
val returning_data = Reg(init = Bool(false))
val (send_idx, send_done) = Counter(
io.in.grant.ready && returning_data, factor)
val gnt_beat = Reg(UInt(width = outerBeatAddrBits))
val gnt_client_id = Reg(UInt(width = outerClientIdBits))
val gnt_manager_id = Reg(UInt(width = outerManagerIdBits))
val gnt_data = Reg(UInt(width = outerDataBits))
when (io.out.grant.fire() && stretch) {
gnt_data := ognt.data
gnt_client_id := ognt.client_xact_id
gnt_manager_id := ognt.manager_xact_id
gnt_beat := ognt.addr_beat
returning_data := Bool(true)
}
when (send_done) { returning_data := Bool(false) }
def select_data(data: UInt, sel: UInt): UInt =
data >> (sel << log2Up(innerDataBits))
val gnt_switch = smallget_switch(ognt.client_xact_id)
val innerConfig = p.alterPartial({ case TLId => innerTLId })
val get_block_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBlockType,
client_xact_id = gnt_client_id,
manager_xact_id = gnt_manager_id,
addr_beat = Cat(gnt_beat, send_idx),
data = select_data(gnt_data, send_idx))(innerConfig)
val get_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBeatType,
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = Cat(ognt.addr_beat, gnt_switch),
data = select_data(ognt.data, gnt_switch))(innerConfig)
val default_grant = Grant(
is_builtin_type = Bool(true),
g_type = ognt.g_type,
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat,
data = ognt.data)(innerConfig)
io.in.grant.valid := returning_data || (!stretch && io.out.grant.valid)
io.in.grant.bits := MuxCase(default_grant, Seq(
returning_data -> get_block_grant,
smallgnt -> get_grant))
io.out.grant.ready := !returning_data && (stretch || io.in.grant.ready)
}
class TileLinkIONarrower(innerTLId: String, outerTLId: String)
(implicit p: Parameters) extends TLModule()(p) {
val innerParams = p(TLKey(innerTLId))
val outerParams = p(TLKey(outerTLId))
val innerDataBeats = innerParams.dataBeats
val innerDataBits = innerParams.dataBitsPerBeat
val innerWriteMaskBits = innerParams.writeMaskBits
val innerByteAddrBits = log2Up(innerWriteMaskBits)
val outerDataBeats = outerParams.dataBeats
val outerDataBits = outerParams.dataBitsPerBeat
val outerWriteMaskBits = outerParams.writeMaskBits
val outerByteAddrBits = log2Up(outerWriteMaskBits)
val outerBeatAddrBits = log2Up(outerDataBeats)
val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
val outerMaxClients = outerParams.maxClientsPerPort
val outerIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
require(outerDataBeats > innerDataBeats)
require(outerDataBeats % innerDataBeats == 0)
require(outerDataBits < innerDataBits)
require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
val factor = outerDataBeats / innerDataBeats
val io = new Bundle {
val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
}
val iacq = io.in.acquire.bits
val ognt = io.out.grant.bits
val stretch = iacq.a_type === Acquire.putBlockType
val shrink = iacq.a_type === Acquire.getBlockType
val smallput = iacq.a_type === Acquire.putType
val smallget = iacq.a_type === Acquire.getType
val acq_data_buffer = Reg(UInt(width = innerDataBits))
val acq_wmask_buffer = Reg(UInt(width = innerWriteMaskBits))
val acq_client_id = Reg(iacq.client_xact_id)
val acq_addr_block = Reg(iacq.addr_block)
val acq_addr_beat = Reg(iacq.addr_beat)
val oacq_ctr = Counter(factor)
val outer_beat_addr = iacq.full_addr()(outerBlockOffset - 1, outerByteAddrBits)
val outer_byte_addr = iacq.full_addr()(outerByteAddrBits - 1, 0)
val mask_chunks = Vec.tabulate(factor) { i =>
val lsb = i * outerWriteMaskBits
val msb = (i + 1) * outerWriteMaskBits - 1
iacq.wmask()(msb, lsb)
}
val data_chunks = Vec.tabulate(factor) { i =>
val lsb = i * outerDataBits
val msb = (i + 1) * outerDataBits - 1
iacq.data(msb, lsb)
}
val beat_sel = Cat(mask_chunks.map(mask => mask.orR).reverse)
val smallput_data = Mux1H(beat_sel, data_chunks)
val smallput_wmask = Mux1H(beat_sel, mask_chunks)
val smallput_beat = Cat(iacq.addr_beat, PriorityEncoder(beat_sel))
assert(!io.in.acquire.valid || !smallput || PopCount(beat_sel) <= UInt(1),
"Can't perform Put wider than outer width")
val read_size_ok = iacq.op_size() <= UInt(log2Ceil(outerDataBits / 8))
assert(!io.in.acquire.valid || !smallget || read_size_ok,
"Can't perform Get wider than outer width")
val outerConfig = p.alterPartial({ case TLId => outerTLId })
val innerConfig = p.alterPartial({ case TLId => innerTLId })
val get_block_acquire = GetBlock(
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
alloc = iacq.allocate())(outerConfig)
val put_block_acquire = PutBlock(
client_xact_id = acq_client_id,
addr_block = acq_addr_block,
addr_beat = if (factor > 1)
Cat(acq_addr_beat, oacq_ctr.value)
else acq_addr_beat,
data = acq_data_buffer(outerDataBits - 1, 0),
wmask = Some(acq_wmask_buffer(outerWriteMaskBits - 1, 0)))(outerConfig)
val get_acquire = Get(
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
addr_beat = outer_beat_addr,
addr_byte = outer_byte_addr,
operand_size = iacq.op_size(),
alloc = iacq.allocate())(outerConfig)
val put_acquire = Put(
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
addr_beat = smallput_beat,
data = smallput_data,
wmask = Some(smallput_wmask))(outerConfig)
val sending_put = Reg(init = Bool(false))
val pass_valid = io.in.acquire.valid && !stretch
io.out.acquire.bits := MuxCase(Wire(io.out.acquire.bits, init=iacq), Seq(
(sending_put, put_block_acquire),
(shrink, get_block_acquire),
(smallput, put_acquire),
(smallget, get_acquire)))
io.out.acquire.valid := sending_put || pass_valid
io.in.acquire.ready := !sending_put && (stretch || io.out.acquire.ready)
when (io.in.acquire.fire() && stretch) {
acq_data_buffer := iacq.data
acq_wmask_buffer := iacq.wmask()
acq_client_id := iacq.client_xact_id
acq_addr_block := iacq.addr_block
acq_addr_beat := iacq.addr_beat
sending_put := Bool(true)
}
when (sending_put && io.out.acquire.ready) {
acq_data_buffer := acq_data_buffer >> outerDataBits
acq_wmask_buffer := acq_wmask_buffer >> outerWriteMaskBits
when (oacq_ctr.inc()) { sending_put := Bool(false) }
}
val ognt_block = ognt.hasMultibeatData()
val gnt_data_buffer = Reg(Vec(factor, UInt(width = outerDataBits)))
val gnt_client_id = Reg(ognt.client_xact_id)
val gnt_manager_id = Reg(ognt.manager_xact_id)
val ignt_ctr = Counter(innerDataBeats)
val ognt_ctr = Counter(factor)
val sending_get = Reg(init = Bool(false))
val get_block_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBlockType,
client_xact_id = gnt_client_id,
manager_xact_id = gnt_manager_id,
addr_beat = ignt_ctr.value,
data = gnt_data_buffer.asUInt)(innerConfig)
val smallget_grant = ognt.g_type === Grant.getDataBeatType
val get_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBeatType,
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat >> UInt(log2Up(factor)),
data = Fill(factor, ognt.data))(innerConfig)
io.in.grant.valid := sending_get || (io.out.grant.valid && !ognt_block)
io.out.grant.ready := !sending_get && (ognt_block || io.in.grant.ready)
io.in.grant.bits := MuxCase(Wire(io.in.grant.bits, init=ognt), Seq(
sending_get -> get_block_grant,
smallget_grant -> get_grant))
when (io.out.grant.valid && ognt_block && !sending_get) {
gnt_data_buffer(ognt_ctr.value) := ognt.data
when (ognt_ctr.inc()) {
gnt_client_id := ognt.client_xact_id
gnt_manager_id := ognt.manager_xact_id
sending_get := Bool(true)
}
}
when (io.in.grant.ready && sending_get) {
ignt_ctr.inc()
sending_get := Bool(false)
}
}
class TileLinkFragmenterSource(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = Decoupled(new Acquire).flip
val out = Decoupled(new Acquire)
val que = Decoupled(UInt(width = tlBeatAddrBits))
}
// Pipeline stage with acquire data; needed to ensure in.bits stay fixed when !in.ready
val acq_valid = RegInit(Bool(false))
val acq_bits = Reg(new Acquire)
// The last beat of generate acquire to send
val acq_last_beat = Reg(UInt(width = tlBeatAddrBits))
val acq_last = acq_bits.addr_beat === acq_last_beat
// 'in' has the first beat?
val in_multi_put = io.in.bits.isBuiltInType(Acquire.putBlockType)
val in_multi_get = io.in.bits.isBuiltInType(Acquire.getBlockType)
val in_first_beat = !in_multi_put || io.in.bits.addr_beat === UInt(0)
// Move stuff from acq to out whenever out is ready
io.out.valid := acq_valid
// When can acq accept a request?
val acq_ready = !acq_valid || (acq_last && io.out.ready)
// Move the first beat from in to acq only when both acq and que are ready
io.in.ready := (!in_first_beat || io.que.ready) && acq_ready
io.que.valid := (in_first_beat && io.in.valid) && acq_ready
// in.fire moves data from in to acq and (optionally) que
// out.fire moves data from acq to out
// Desired flow control results:
assert (!io.que.fire() || io.in.fire()) // 1. que.fire => in.fire
assert (!(io.in.fire() && in_first_beat) || io.que.fire()) // 2. in.fire && in_first_beat => que.fire
assert (!io.out.fire() || acq_valid) // 3. out.fire => acq_valid
assert (!io.in.fire() || (!acq_valid || (io.out.fire() && acq_last))) // 4. in.fire => !acq_valid || (out.fire && acq_last)
// Proofs:
// 1. que.fire => que.ready && in.valid && acq_ready => in.ready && in.valid
// 2. in.fire && in_first_beat => in.valid && acq_ready && [(!in_first_beat || que.ready) && in_first_beat] =>
// in.valid && acq_ready && que.ready && in_first_beat => que.valid && que.ready
// 3. out.fire => out.valid => acq_valid
// 4. in.fire => acq_ready => !acq_valid || (acq_last && out.ready) =>
// !acq_valid || (acq_valid && acq_last && out.ready) => !acq_valid || (acq_last && out.fire)
val multi_size = SInt(-1, width = tlBeatAddrBits).asUInt // TL2: use in.bits.size()/beatBits-1
val in_sizeMinus1 = Mux(in_multi_get || in_multi_put, multi_size, UInt(0))
val in_insertSizeMinus1 = Mux(in_multi_get, multi_size, UInt(0))
when (io.in.fire()) {
// Theorem 4 makes this safe; we overwrite garbage, or replace the final acq
acq_valid := Bool(true)
acq_bits := io.in.bits
acq_last_beat := io.in.bits.addr_beat + in_insertSizeMinus1
// Replace this with size truncation in TL2:
acq_bits.a_type := Mux(in_multi_put, Acquire.putType, Mux(in_multi_get, Acquire.getType, io.in.bits.a_type))
} .elsewhen (io.out.fire()) {
acq_valid := !acq_last // false => !in.valid || (!que.ready && in_first_beat)
acq_bits.addr_beat := acq_bits.addr_beat + UInt(1)
// acq_last && out.fire => acq_last && out.ready && acq_valid => acq_ready
// Suppose in.valid, then !in.fire => !in.ready => !(!in_first_beat || que.ready) => !que.ready && in_first_beat
}
// Safe by theorem 3
io.out.bits := acq_bits
// Safe by theorem 1
io.que.bits := in_sizeMinus1
}
class TileLinkFragmenterSink(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = Decoupled(new Grant).flip
val out = Decoupled(new Grant)
val que = Decoupled(UInt(width = tlBeatAddrBits)).flip
}
val count_valid = RegInit(Bool(false))
val multi_op = Reg(Bool())
val count_bits = Reg(UInt(width = tlBeatAddrBits))
val last = count_bits === UInt(0)
val in_put = io.in.bits.isBuiltInType(Grant.putAckType)
val in_get = io.in.bits.isBuiltInType(Grant.getDataBeatType)
val deliver = last || in_get
// Accept the input, discarding the non-final put grant
io.in.ready := count_valid && (io.out.ready || !deliver)
// Output the grant whenever we want delivery
io.out.valid := count_valid && io.in.valid && deliver
// Take a new number whenever we deliver the last beat
io.que.ready := !count_valid || (io.in.valid && io.out.ready && last)
// Desired flow control results:
assert (!io.out.fire() || (count_valid && io.in.fire())) // 1. out.fire => in.fire && count_valid
assert (!(io.in.fire() && deliver) || io.out.fire()) // 2. in.fire && deliver => out.fire
assert (!(io.out.fire() && last) || io.que.ready) // 3. out.fire && last => que.ready
assert (!io.que.fire() || (!count_valid || io.out.fire())) // 4. que.fire => !count_valid || (out.fire && last)
// Proofs:
// 1. out.fire => out.ready && (count_valid && in.valid && deliver) => (count_valid && out.ready) && in.valid => in.fire
// 2. in.fire && deliver => in.valid && count_valid && [(out.ready || !deliver) && deliver] =>
// in.valid && count_valid && deliver && out.ready => out.fire
// 3. out.fire && last => out.valid && out.ready && last => in.valid && out.ready && last => que.ready
// 4. que.fire => que.valid && (!count_valid || (in.valid && out.ready && last))
// => !count_valid || (count_valid && in.valid && out.ready && [last => deliver])
// => !count_valid || (out.valid && out.ready && last)
when (io.que.fire()) {
// Theorem 4 makes this safe; we overwrite garbage or last output
count_valid := Bool(true)
count_bits := io.que.bits
multi_op := io.que.bits =/= UInt(0)
} .elsewhen (io.in.fire()) {
count_valid := !last // false => !que.valid
count_bits := count_bits - UInt(1)
// Proof: in.fire && [last => deliver] =2=> out.fire && last =3=> que.ready
// !que.fire && que.ready => !que.valid
}
// Safe by Theorem 1
io.out.bits := io.in.bits
io.out.bits.g_type := Mux(multi_op, Mux(in_get, Grant.getDataBlockType, Grant.putAckType), io.in.bits.g_type)
}
class TileLinkFragmenter(depth: Int = 1)(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new ClientUncachedTileLinkIO().flip
val out = new ClientUncachedTileLinkIO
}
// TL2:
// supportsAcquire = false
// modify all outward managers to supportsMultibeat = true
// assert: all managers must behaveFIFO (not inspect duplicated id field)
val source = Module(new TileLinkFragmenterSource)
val sink = Module(new TileLinkFragmenterSink)
sink.io.que <> Queue(source.io.que, depth)
source.io.in <> io.in.acquire
io.out.acquire <> source.io.out
sink.io.in <> io.out.grant
io.in.grant <> sink.io.out
}
object TileLinkFragmenter {
// Pass the source/client to fragment
def apply(source: ClientUncachedTileLinkIO, depth: Int = 1)(implicit p: Parameters): ClientUncachedTileLinkIO = {
val fragmenter = Module(new TileLinkFragmenter(depth))
fragmenter.io.in <> source
fragmenter.io.out
}
}