package uncore import Chisel._ import junctions._ import cde.{Parameters, Field} import HastiConstants._ /* We need to translate TileLink requests into operations we can actually execute on AHB. * The general plan of attack is: * get => one AHB=>TL read * put => [multiple AHB write fragments=>nill], one AHB write=>TL * getBlock => AHB burst reads =>TL * putBlock => AHB burst writes=>TL * getPrefetch => noop=>TL * putPrefetch => noop=>TL * putAtomic => one AHB=>TL read, one idle, one AHB atom_write=>nill, one idle * * This requires that we support a pipeline of optional AHB requests with optional TL responses */ class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO with HasGrantType with HasClientTransactionId with HasTileLinkBeatId { val executeAHB = Bool() val respondTL = Bool() val latchAtom = Bool() val firstBurst = Bool() val finalBurst = Bool() val cmd = Bits(width = M_SZ) // atomic op } // AHB stage1: translate TileLink Acquires into AHBRequests class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module with HasHastiParameters with HasTileLinkParameters with HasAddrMapParameters { val io = new Bundle { val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe val request = new DecoupledIO(new AHBRequestIO) } // Match the AHB burst with a TileLink {Put,Get}Block val burstSize = tlDataBeats match { case 1 => HBURST_SINGLE // case 2 not supported by AHB case 4 => HBURST_WRAP4 case 8 => HBURST_WRAP8 case 16 => HBURST_WRAP16 case _ => throw new java.lang.AssertionError("TileLink beats unsupported by AHB") } // Bursts start at 0 and wrap-around back to 0 val finalBurst = UInt(tlDataBeats-1, width = log2Up(tlDataBeats)).asUInt val firstBurst = UInt(0, width = log2Up(tlDataBeats)) val next_wmask = Wire(UInt(width = tlDataBytes)) // calculated below // State variables for processing more complicated TileLink Acquires val s_atom_r :: s_atom_idle1 :: s_atom_w :: s_atom_idle2 :: Nil = Enum(UInt(), 4) val atom_state = Reg(init = s_atom_r) // never changes if !supportAtomics val done_wmask = Reg(init = UInt(0, width = tlDataBytes)) val burst = Reg(init = firstBurst) // Grab some view of the TileLink acquire val acq_wmask = io.acquire.bits.wmask() val isReadBurst = io.acquire.bits.is(Acquire.getBlockType) val isWriteBurst = io.acquire.bits.is(Acquire.putBlockType) val isBurst = isWriteBurst || isReadBurst val isAtomic = io.acquire.bits.is(Acquire.putAtomicType) && Bool(supportAtomics) val isPut = io.acquire.bits.is(Acquire.putType) // Final states? val last_wmask = next_wmask === acq_wmask val last_atom = atom_state === s_atom_idle2 val last_burst = burst === finalBurst // Block the incoming request until we've fully consumed it // NOTE: the outgoing grant.valid may happen while acquire.ready is still false; // for this reason it is essential to have a Queue or a Pipe infront of acquire io.acquire.ready := io.request.ready && MuxLookup(io.acquire.bits.a_type, Bool(true), Array( Acquire.getType -> Bool(true), Acquire.getBlockType -> last_burst, // hold it until the last beat is burst Acquire.putType -> last_wmask, // only accept the put if we can fully consume its wmask Acquire.putBlockType -> Bool(true), Acquire.putAtomicType -> last_atom, // atomic operation stages complete Acquire.getPrefetchType -> Bool(true), Acquire.putPrefetchType -> Bool(true))) // Advance the fragment state when (io.request.ready && io.acquire.valid && isPut) { when (last_wmask) { // if this was the last fragment, restart FSM done_wmask := UInt(0) } .otherwise { done_wmask := next_wmask } } // Advance the burst state // We assume here that TileLink gives us all putBlock beats with nothing between them when (io.request.ready && io.acquire.valid && isBurst) { when (last_burst) { burst := UInt(0) } .otherwise { burst := burst + UInt(1) } } // Advance the atomic state machine when (io.request.ready && io.acquire.valid && isAtomic) { switch (atom_state) { is (s_atom_r) { atom_state := s_atom_idle1 } is (s_atom_idle1) { atom_state := s_atom_w } // idle1 => AMOALU runs on a different clock than AHB slave read is (s_atom_w) { atom_state := s_atom_idle2 } is (s_atom_idle2) { atom_state := s_atom_r } // idle2 state is required by AHB after hmastlock is lowered } } // Returns (range=0, range=-1, aligned_wmask, size) def mask_helper(in_0 : Bool, range : UInt): (Bool, Bool, UInt, UInt) = { val len = range.getWidth if (len == 1) { (range === UInt(0), range === UInt(1), in_0.asUInt() & range, UInt(0)) } else { val mid = len / 2 val lo = range(mid-1, 0) val hi = range(len-1, mid) val (lo_0, lo_1, lo_m, lo_s) = mask_helper(in_0, lo) val (hi_0, hi_1, hi_m, hi_s) = mask_helper(in_0 && lo_0, hi) val out_0 = lo_0 && hi_0 val out_1 = lo_1 && hi_1 val out_m = Cat(hi_m, lo_m) | Fill(len, (in_0 && out_1).asUInt()) val out_s = Mux(out_1, UInt(log2Up(len)), Mux(lo_0, hi_s, lo_s)) (out_0, out_1, out_m, out_s) } } val pending_wmask = acq_wmask & ~done_wmask val put_addr = PriorityEncoder(pending_wmask) val (wmask_0, _, exec_wmask, put_size) = mask_helper(Bool(true), pending_wmask) next_wmask := done_wmask | exec_wmask // Calculate the address, with consideration to put fragments and bursts val addr_block = io.acquire.bits.addr_block val addr_beat = io.acquire.bits.addr_beat val addr_burst = Mux(isReadBurst, addr_beat + burst, addr_beat) val addr_byte = Mux(isPut, put_addr, io.acquire.bits.addr_byte()) val ahbAddr = Cat(addr_block, addr_burst, addr_byte) val ahbSize = Mux(isPut, put_size, Mux(isBurst, UInt(log2Ceil(tlDataBytes)), io.acquire.bits.op_size())) val ahbBurst = MuxLookup(io.acquire.bits.a_type, HBURST_SINGLE, Array( Acquire.getType -> HBURST_SINGLE, Acquire.getBlockType -> burstSize, Acquire.putType -> HBURST_SINGLE, Acquire.putBlockType -> burstSize, Acquire.putAtomicType -> HBURST_SINGLE, Acquire.getPrefetchType -> HBURST_SINGLE, Acquire.putPrefetchType -> HBURST_SINGLE)) val ahbWrite = MuxLookup(io.acquire.bits.a_type, Bool(false), Array( Acquire.getType -> Bool(false), Acquire.getBlockType -> Bool(false), Acquire.putType -> Bool(true), Acquire.putBlockType -> Bool(true), Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array( s_atom_r -> Bool(false), s_atom_idle1 -> Bool(false), // don't care s_atom_w -> Bool(true), s_atom_idle2 -> Bool(true))), // don't care Acquire.getPrefetchType -> Bool(false), // don't care Acquire.putPrefetchType -> Bool(true))) // don't care val ahbExecute = MuxLookup(io.acquire.bits.a_type, Bool(false), Array( Acquire.getType -> Bool(true), Acquire.getBlockType -> Bool(true), Acquire.putType -> !wmask_0, // handle the case of a Put with no bytes! Acquire.putBlockType -> Bool(true), Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array( s_atom_r -> Bool(true), s_atom_idle1 -> Bool(false), s_atom_w -> Bool(true), s_atom_idle2 -> Bool(false))), Acquire.getPrefetchType -> Bool(false), Acquire.putPrefetchType -> Bool(false))) val respondTL = MuxLookup(io.acquire.bits.a_type, Bool(false), Array( Acquire.getType -> Bool(true), Acquire.getBlockType -> Bool(true), Acquire.putType -> last_wmask, Acquire.putBlockType -> Bool(true), Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array( s_atom_r -> Bool(true), // they want the old data s_atom_idle1 -> Bool(false), s_atom_w -> Bool(false), s_atom_idle2 -> Bool(false))), Acquire.getPrefetchType -> Bool(true), Acquire.putPrefetchType -> Bool(true))) io.request.valid := io.acquire.valid io.request.bits.htrans := HTRANS_IDLE // unused/ignored io.request.bits.haddr := ahbAddr io.request.bits.hmastlock := isAtomic && atom_state =/= s_atom_idle2 io.request.bits.hwrite := ahbWrite io.request.bits.hburst := ahbBurst io.request.bits.hsize := ahbSize io.request.bits.hprot := HPROT_DATA | HPROT_PRIVILEGED io.request.bits.hwdata := io.acquire.bits.data io.request.bits.executeAHB := ahbExecute io.request.bits.respondTL := respondTL io.request.bits.latchAtom := isAtomic && atom_state === s_atom_r io.request.bits.firstBurst := burst === firstBurst io.request.bits.finalBurst := burst === finalBurst || !isBurst io.request.bits.cmd := io.acquire.bits.op_code() io.request.bits.is_builtin_type := Bool(true) io.request.bits.g_type := io.acquire.bits.getBuiltInGrantType() io.request.bits.client_xact_id := io.acquire.bits.client_xact_id io.request.bits.addr_beat := addr_burst val debugBurst = Reg(UInt()) debugBurst := addr_burst - burst // We only support built-in TileLink requests assert(!io.acquire.valid || io.acquire.bits.is_builtin_type, "AHB bridge only supports builtin TileLink types") // Ensure alignment of address to size assert(!io.acquire.valid || (ahbAddr & ((UInt(1) << ahbSize) - UInt(1))) === UInt(0), "TileLink operation misaligned") // If this is a putBlock, make sure it moves properly assert(!io.acquire.valid || !isBurst || burst === firstBurst || debugBurst === addr_burst - burst, "TileLink putBlock beats not sequential") // We better not get an incomplete TileLink acquire assert(!io.acquire.valid || isBurst || burst === firstBurst, "TileLink never completed a putBlock") // If we disabled atomic support, we better not see a request assert(!io.acquire.bits.is(Acquire.putAtomicType) || Bool(supportAtomics)) } // AHB stage2: execute AHBRequests class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module with HasHastiParameters with HasTileLinkParameters with HasAddrMapParameters { val io = new Bundle { val request = new DecoupledIO(new AHBRequestIO).flip val grant = new DecoupledIO(new Grant) val ahb = new HastiMasterIO() } // All AHB outputs are registered (they might be IOs) val midBurst = Reg(init = Bool(false)) val htrans = Reg(init = HTRANS_IDLE) val haddr = Reg(UInt()) val hmastlock = Reg(init = Bool(false)) val hwrite = Reg(Bool()) val hburst = Reg(UInt()) val hsize = Reg(init = UInt(0, width = SZ_HSIZE)) val hprot = Reg(UInt()) val hwdata0 = Reg(Bits()) val hwdata1 = Reg(Bits()) val hrdata = Reg(Bits()) io.ahb.htrans := htrans io.ahb.haddr := haddr io.ahb.hmastlock := hmastlock io.ahb.hwrite := hwrite io.ahb.hburst := hburst io.ahb.hsize := hsize io.ahb.hprot := hprot io.ahb.hwdata := hwdata1 // one cycle after the address phase // TileLink response data needed in data phase val respondTL0 = Reg(init = Bool(false)) val respondTL1 = Reg(init = Bool(false)) val latchAtom0 = Reg(init = Bool(false)) val latchAtom1 = Reg(init = Bool(false)) val bubble = Reg(init = Bool(true)) // nothing useful in address phase val cmd = Reg(Bits()) val g_type0 = Reg(UInt()) val g_type1 = Reg(UInt()) val client_xact_id0 = Reg(Bits()) val client_xact_id1 = Reg(Bits()) val addr_beat0 = Reg(UInt()) val addr_beat1 = Reg(UInt()) val grant1 = Reg(new Grant) // It is allowed to progress from Idle/Busy during a wait state val addrReady = io.ahb.hready || bubble val dataReady = io.ahb.hready // Only accept a new AHBRequest if we have enough buffer space in the pad // to accomodate a persistent drop in TileLink's grant.ready io.request.ready := addrReady && io.grant.ready // htrans must be updated even if no request is valid when (addrReady) { when (io.request.fire() && io.request.bits.executeAHB) { midBurst := !io.request.bits.finalBurst when (io.request.bits.firstBurst) { htrans := HTRANS_NONSEQ } .otherwise { htrans := HTRANS_SEQ } } .otherwise { when (midBurst) { htrans := HTRANS_BUSY } .otherwise { htrans := HTRANS_IDLE } } } // Address phase, clear repondTL when we have nothing to do when (addrReady) { when (io.request.fire()) { respondTL0 := io.request.bits.respondTL latchAtom0 := io.request.bits.latchAtom bubble := Bool(false) } .otherwise { respondTL0 := Bool(false) latchAtom0 := Bool(false) bubble := Bool(true) // an atom-injected Idle is not a bubble! } } // Transfer bulk address phase when (io.request.fire()) { haddr := io.request.bits.haddr hmastlock := io.request.bits.hmastlock hwrite := io.request.bits.hwrite hburst := io.request.bits.hburst hsize := io.request.bits.hsize hprot := io.request.bits.hprot hwdata0 := io.request.bits.hwdata cmd := io.request.bits.cmd g_type0 := io.request.bits.g_type client_xact_id0 := io.request.bits.client_xact_id addr_beat0 := io.request.bits.addr_beat } // Execute Atomic ops; unused and optimized away if !supportAtomics val amo_p = p.alterPartial({ case CacheBlockOffsetBits => hastiAddrBits case AmoAluOperandBits => hastiDataBits }) val alu = Module(new AMOALU(rhsIsAligned = false)(amo_p)) alu.io.addr := haddr alu.io.cmd := cmd alu.io.typ := hsize alu.io.rhs := hwdata0 alu.io.lhs := hrdata // Transfer bulk data phase // NOTE: this introduces no bubbles because addrReady is a superset of dataReady when (dataReady) { hwdata1 := Mux(Bool(supportAtomics), alu.io.out, hwdata0) respondTL1 := respondTL0 latchAtom1 := latchAtom0 g_type1 := g_type0 client_xact_id1 := client_xact_id0 addr_beat1 := addr_beat0 } // Latch the read result for an atomic operation when (dataReady && latchAtom1) { hrdata := io.ahb.hrdata } // Only issue TL grant when the slave has provided data io.grant.valid := dataReady && respondTL1 io.grant.bits := Grant( is_builtin_type = Bool(true), g_type = g_type1, client_xact_id = client_xact_id1, manager_xact_id = UInt(0), addr_beat = addr_beat1, data = io.ahb.hrdata) // We cannot support errors from AHB to TileLink assert(!io.ahb.hresp, "AHB hresp error detected and cannot be reported via TileLink") } class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module with HasHastiParameters with HasTileLinkParameters with HasAddrMapParameters { val io = new Bundle { val tl = new ClientUncachedTileLinkIO().flip val ahb = new HastiMasterIO() } // Hasti and TileLink widths must agree at this point in the topology require (tlDataBits == hastiDataBits) require (p(PAddrBits) == hastiAddrBits) // AHB does not permit bursts to cross a 1KB boundary require (tlDataBits * tlDataBeats <= 1024*8) // tlDataBytes must be a power of 2 require (1 << log2Ceil(tlDataBytes) == tlDataBytes) // Create the sub-blocks val fsm = Module(new AHBTileLinkIn(supportAtomics)) val bus = Module(new AHBBusMaster(supportAtomics)) val pad = Module(new Queue(new Grant, 4)) fsm.io.acquire <> Queue(io.tl.acquire, 2) // Pipe is also acceptable bus.io.request <> fsm.io.request io.ahb <> bus.io.ahb io.tl.grant <> pad.io.deq // The pad is needed to absorb AHB progress while !grant.ready // We are only 'ready' if the pad has at least 3 cycles of space bus.io.grant.ready := pad.io.count <= UInt(1) pad.io.enq.bits := bus.io.grant.bits pad.io.enq.valid := bus.io.grant.valid }