tilelink2 AtomicAutomata: prototype flow control complete
This commit is contained in:
		
							
								
								
									
										249
									
								
								src/main/scala/uncore/tilelink2/AtomicAutomata.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										249
									
								
								src/main/scala/uncore/tilelink2/AtomicAutomata.scala
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,249 @@ | ||||
| // See LICENSE for license details. | ||||
|  | ||||
| package uncore.tilelink2 | ||||
|  | ||||
| import Chisel._ | ||||
| import chisel3.internal.sourceinfo.SourceInfo | ||||
| import scala.math.{min,max} | ||||
|  | ||||
| // Ensures that all downstream RW managers support Atomic operationss. | ||||
| // If !passthrough, intercept all Atomics. Otherwise, only intercept those unsupported downstream. | ||||
| class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, concurrency: Int = 1, passthrough: Boolean = true) extends LazyModule | ||||
| { | ||||
|   require (concurrency >= 1) | ||||
|  | ||||
|   val node = TLAdapterNode( | ||||
|     clientFn  = { case Seq(cp) => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) }, | ||||
|     managerFn = { case Seq(mp) => mp.copy(managers = mp.managers.map { m => | ||||
|       val ourSupport = TransferSizes(1, mp.beatBytes) | ||||
|       def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport | ||||
|       val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport) | ||||
|       // Blow up if there are devices to which we cannot add Atomics, because their R|W are too inflexible | ||||
|       require (!m.supportsPutFull || !m.supportsGet || canDoit) | ||||
|       m.copy( | ||||
|         supportsArithmetic = if (!arithmetic || !canDoit) m.supportsArithmetic else widen(m.supportsArithmetic), | ||||
|         supportsLogical    = if (!logical    || !canDoit) m.supportsLogical    else widen(m.supportsLogical)) | ||||
|     })}) | ||||
|  | ||||
|   lazy val module = new LazyModuleImp(this) { | ||||
|     val io = new Bundle { | ||||
|       val in  = node.bundleIn | ||||
|       val out = node.bundleOut | ||||
|     } | ||||
|  | ||||
|     val in  = io.in(0) | ||||
|     val out = io.out(0) | ||||
|     val edgeIn  = node.edgesIn(0) | ||||
|     val edgeOut = node.edgesOut(0) | ||||
|     val managers = edgeOut.manager.managers | ||||
|  | ||||
|     // To which managers are we adding atomic support? | ||||
|     val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes) | ||||
|     val managersNeedingHelp = managers.filter { m => | ||||
|       m.supportsPutFull.contains(ourSupport) && | ||||
|       m.supportsGet.contains(ourSupport) && | ||||
|       ((logical    && !m.supportsLogical   .contains(ourSupport)) || | ||||
|        (arithmetic && !m.supportsArithmetic.contains(ourSupport)) || | ||||
|        !passthrough) // we will do atomics for everyone we can | ||||
|     } | ||||
|     // We cannot add atomcis to a non-FIFO manager | ||||
|     managersNeedingHelp foreach { m => require (m.fifoId.isDefined) } | ||||
|     // We need to preserve FIFO semantics across FIFO domains, not managers | ||||
|     // Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43 | ||||
|     // If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef | ||||
|     // Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer) | ||||
|     val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct | ||||
|     // Don't overprovision the CAM | ||||
|     val camSize = min(domainsNeedingHelp.size, concurrency) | ||||
|     // Compact the fifoIds to only those we care about | ||||
|     val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0))) | ||||
|  | ||||
|     // CAM entry state machine | ||||
|     val FREE = UInt(0) // unused                   waiting on Atomic from A | ||||
|     val GET  = UInt(3) // Get sent down A          waiting on AccessDataAck from D | ||||
|     val AMO  = UInt(2) // AccessDataAck sent up D  waiting for A availability | ||||
|     val ACK  = UInt(1) // Put sent down A          waiting for PutAck from D | ||||
|  | ||||
|     class CAMEntry extends Bundle { | ||||
|       val state = UInt(width = 2) | ||||
|       val fifoId = UInt(width = log2Up(domainsNeedingHelp.size)) | ||||
|       val bits  = new TLBundleA(in.a.bits.params) | ||||
|     } | ||||
|  | ||||
|     def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) = | ||||
|       if (!passthrough) Bool(false) else | ||||
|       if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else | ||||
|       Mux1H(select, x.map(_.containsLg(lgSize)))  | ||||
|  | ||||
|     // Do we need to do anything at all? | ||||
|     if (camSize > 0) { | ||||
|       class CAM_S extends Bundle { | ||||
|         val state = UInt(width = 2) | ||||
|       } | ||||
|       class CAM_A extends Bundle { | ||||
|         val bits    = new TLBundleA(out.a.bits.params) | ||||
|         val fifoId  = UInt(width = log2Up(domainsNeedingHelp.size)) | ||||
|       } | ||||
|       class CAM_D extends Bundle { | ||||
|         val data = UInt(width = out.a.bits.params.dataBits) | ||||
|       } | ||||
|  | ||||
|       val initval = Wire(new CAM_S) | ||||
|       initval.state := FREE | ||||
|       val cam_s = RegInit(Vec.fill(camSize)(initval)) | ||||
|       val cam_a = Reg(Vec(camSize, new CAM_A)) | ||||
|       val cam_d = Reg(Vec(camSize, new CAM_D)) | ||||
|  | ||||
|       val cam_free   = cam_s.map(_.state === FREE) | ||||
|       val cam_amo    = cam_s.map(_.state === AMO) | ||||
|       val cam_abusy  = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked | ||||
|       val cam_dmatch = cam_s.map(e => e.state === GET || e.state === ACK) // D should inspect these entries | ||||
|  | ||||
|       // Can the manager already handle this message? | ||||
|       val a_size = edgeIn.size(in.a.bits) | ||||
|       val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits)) | ||||
|       val a_canLogical    = helper(a_select, managers.map(_.supportsLogical),    a_size) | ||||
|       val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size) | ||||
|       val a_isLogical    = in.a.bits.opcode === TLMessages.LogicalData | ||||
|       val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData | ||||
|       val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true))) | ||||
|  | ||||
|       // Must we do a Put? | ||||
|       val a_cam_any_put = cam_amo.reduce(_ || _) | ||||
|       val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init | ||||
|       val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b } | ||||
|       val a_cam_adata = PriorityMux(cam_amo, cam_a.map(_.bits)) | ||||
|       val a_cam_ddata = PriorityMux(cam_amo, cam_d.map(_.data)) | ||||
|  | ||||
|       // Does the A request conflict with an inflight AMO? | ||||
|       val a_fifoId  = Mux1H(a_select, camFifoIds) | ||||
|       val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_) | ||||
|  | ||||
|       // (Where) are we are allocating in the CAM? | ||||
|       val a_cam_any_free = cam_free.reduce(_ || _) | ||||
|       val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init | ||||
|       val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b } | ||||
|  | ||||
|       // !!! perform the AMO op | ||||
|       val amo_data = a_cam_adata.data + a_cam_ddata | ||||
|  | ||||
|       // Potentially mutate the message from inner | ||||
|       val source_i = Wire(in.a) | ||||
|       val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free) | ||||
|       in.a.ready := source_i.ready && a_allow | ||||
|       source_i.valid := in.a.valid && a_allow | ||||
|       source_i.bits  := in.a.bits | ||||
|       when (!a_isSupported) { // minimal mux difference | ||||
|         source_i.bits.opcode := TLMessages.Get | ||||
|         source_i.bits.param  := UInt(0) | ||||
|       } | ||||
|  | ||||
|       // Potentially take the message from the CAM | ||||
|       val source_c = Wire(in.a) | ||||
|       source_c.valid := a_cam_any_put | ||||
|       source_c.bits := edgeOut.Put(a_cam_adata.source, edgeIn.address(a_cam_adata), a_cam_adata.size, amo_data)._2 | ||||
|  | ||||
|       // Finishing an AMO from the CAM has highest priority | ||||
|       TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(1), source_c), (edgeOut.numBeats(in.a.bits), source_i)) | ||||
|  | ||||
|       // Capture the A state into the CAM | ||||
|       when (source_i.fire() && !a_isSupported) { | ||||
|         (a_cam_sel_free zip cam_a) foreach { case (en, r) => | ||||
|           when (en) { | ||||
|             r.fifoId := a_fifoId | ||||
|             r.bits   := in.a.bits | ||||
|           } | ||||
|         } | ||||
|         (a_cam_sel_free zip cam_s) foreach { case (en, r) => | ||||
|           when (en) { | ||||
|             r.state := GET | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       // Advance the put state | ||||
|       when (source_c.fire()) { | ||||
|         (a_cam_sel_put zip cam_s) foreach { case (en, r) => | ||||
|           when (en) { | ||||
|             r.state := ACK | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       // We need to deal with a potential D response in the same cycle as the A request | ||||
|       val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source) | ||||
|       val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b } | ||||
|       val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data)) | ||||
|       val d_cam_sel_bypass = out.d.bits.source === in.a.bits.source && in.a.valid && out.d.valid && !a_isSupported | ||||
|       val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) } | ||||
|       val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _) | ||||
|       val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData | ||||
|       val d_ack  = out.d.bits.opcode === TLMessages.AccessAck | ||||
|  | ||||
|       when (out.d.fire()) { | ||||
|         (d_cam_sel zip cam_d) foreach { case (en, r) => | ||||
|           when (en && d_ackd) { | ||||
|             r.data := out.d.bits.data | ||||
|           } | ||||
|         } | ||||
|         (d_cam_sel zip cam_s) foreach { case (en, r) => | ||||
|           when (en) { | ||||
|             // Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle | ||||
|             r.state := Mux(d_ackd, AMO, FREE) | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       val d_drop = d_ackd && d_cam_sel_any | ||||
|       val d_replace = d_ack && d_cam_sel_match.reduce(_ || _) | ||||
|  | ||||
|       in.d.valid := out.d.valid && !d_drop | ||||
|       out.d.ready := in.d.ready || d_drop | ||||
|  | ||||
|       in.d.bits := out.d.bits | ||||
|       when (d_replace) { // minimal muxes | ||||
|         in.d.bits.opcode := TLMessages.AccessAckData | ||||
|         in.d.bits.data := d_cam_data | ||||
|       } | ||||
|     } else { | ||||
|       out.a.valid := in.a.valid | ||||
|       in.a.ready := out.a.ready | ||||
|       out.a.bits := in.a.bits | ||||
|  | ||||
|       in.d.valid := out.d.valid | ||||
|       out.d.ready := in.d.ready | ||||
|       in.d.bits := out.d.bits | ||||
|     } | ||||
|  | ||||
|     if (edgeOut.manager.anySupportAcquire && edgeIn.client.anySupportProbe) { | ||||
|       in.b.valid := out.b.valid | ||||
|       out.b.ready := in.b.ready | ||||
|       in.b.bits := out.b.bits | ||||
|  | ||||
|       out.c.valid := in.c.valid | ||||
|       in.c.ready := out.c.ready | ||||
|       out.c.bits := in.c.bits | ||||
|  | ||||
|       out.e.valid := in.e.valid | ||||
|       in.e.ready := out.e.ready | ||||
|       out.e.bits := in.e.bits | ||||
|     } else { | ||||
|       in.b.valid := Bool(false) | ||||
|       in.c.ready := Bool(true) | ||||
|       in.e.ready := Bool(true) | ||||
|       out.b.ready := Bool(true) | ||||
|       out.c.valid := Bool(false) | ||||
|       out.e.valid := Bool(false) | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| object TLAtomicAutomata | ||||
| { | ||||
|   // applied to the TL source node; y.node := TLAtomicAutomata(x.node) | ||||
|   def apply(logical: Boolean = true, arithmetic: Boolean = true, concurrency: Int = 1, passthrough: Boolean = true)(x: TLBaseNode)(implicit sourceInfo: SourceInfo): TLBaseNode = { | ||||
|     val atomics = LazyModule(new TLAtomicAutomata(logical, arithmetic, concurrency, passthrough)) | ||||
|     atomics.node := x | ||||
|     atomics.node | ||||
|   } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user