From eec590c1bfcdafca615bddc98204fc9f83f2e4a8 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 20 Mar 2013 14:11:54 -0700 Subject: [PATCH] Added support for multiple L2 banks. Moved tile IO queueing. --- src/main/scala/RocketChip.scala | 232 +++++++++++++++++++++++--------- src/main/scala/fpga.scala | 37 +++-- 2 files changed, 191 insertions(+), 78 deletions(-) diff --git a/src/main/scala/RocketChip.scala b/src/main/scala/RocketChip.scala index 9d798f3a..60ecb4a1 100644 --- a/src/main/scala/RocketChip.scala +++ b/src/main/scala/RocketChip.scala @@ -2,6 +2,7 @@ package referencechip import Chisel._ import Node._ +import uncore.Constants._ import uncore._ import rocket._ import rocket.Util._ @@ -9,6 +10,122 @@ import ReferenceChipBackend._ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap +object TileLinkHeaderAppender { + def apply[T <: SourcedMessage with HasPhysicalAddress, U <: SourcedMessage with HasMemData](meta: ClientSourcedIO[LogicalNetworkIO[T]], data: ClientSourcedIO[LogicalNetworkIO[U]], clientId: Int, nBanks: Int, bankIdLsb: Int)(implicit conf: UncoreConfiguration) = { + val shim = (new TileLinkHeaderAppenderWithData(clientId, nBanks, bankIdLsb)){meta.bits.payload.clone}{data.bits.payload.clone} + shim.io.meta_in <> meta + shim.io.data_in <> data + (shim.io.meta_out, shim.io.data_out) + } + def apply[T <: SourcedMessage with HasPhysicalAddress](meta: ClientSourcedIO[LogicalNetworkIO[T]], clientId: Int, nBanks: Int, bankIdLsb: Int)(implicit conf: UncoreConfiguration) = { + val shim = (new TileLinkHeaderAppender(clientId, nBanks, bankIdLsb)){meta.bits.payload.clone} + shim.io.meta_in <> meta + shim.io.meta_out + } +} + +abstract class AddressConverter extends Component { + def convertAddrToBank(addr: Bits, n: Int, lsb: Int): UFix = { + require(lsb + log2Up(n) < PADDR_BITS - OFFSET_BITS, {println("Invalid bits for bank multiplexing.")}) + addr(lsb + log2Up(n) - 1, lsb) + } +} + +class TileLinkHeaderAppenderWithData[T <: SourcedMessage with HasPhysicalAddress, U <: SourcedMessage with HasMemData](clientId: Int, nBanks: Int, bankIdLsb: Int)(metadata: => T)(data: => U)(implicit conf: UncoreConfiguration) extends AddressConverter { + implicit val ln = conf.ln + val io = new Bundle { + val meta_in = (new ClientSourcedIO){(new LogicalNetworkIO){ metadata }}.flip + val data_in = (new ClientSourcedIO){(new LogicalNetworkIO){ data }}.flip + val meta_out = (new ClientSourcedIO){(new LogicalNetworkIO){ metadata }} + val data_out = (new ClientSourcedIO){(new LogicalNetworkIO){ data }} + } + + val meta_q = Queue(io.meta_in) + val data_q = Queue(io.data_in) + if(nBanks == 1) { + io.meta_out.bits.payload := meta_q.bits.payload + io.meta_out.bits.header.src := UFix(clientId) + io.meta_out.bits.header.dst := UFix(0) + io.meta_out.valid := meta_q.valid + meta_q.ready := io.meta_out.ready + io.data_out.bits.payload := data_q.bits.payload + io.data_out.bits.header.src := UFix(clientId) + io.data_out.bits.header.dst := UFix(0) + io.data_out.valid := data_q.valid + data_q.ready := io.data_out.ready + } else { + val meta_has_data = conf.co.messageHasData(meta_q.bits.payload) + val addr_q = (new Queue(2, pipe = true, flow = true)){io.meta_in.bits.payload.addr.clone} + val data_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) + val data_cnt_up = data_cnt + UFix(1) + + io.meta_out.bits.payload := meta_q.bits.payload + io.meta_out.bits.header.src := UFix(clientId) + io.meta_out.bits.header.dst := convertAddrToBank(meta_q.bits.payload.addr, nBanks, bankIdLsb) + io.data_out.bits.payload := meta_q.bits.payload + io.data_out.bits.header.src := UFix(clientId) + io.data_out.bits.header.dst := convertAddrToBank(addr_q.io.deq.bits, nBanks, bankIdLsb) + addr_q.io.enq.bits := meta_q.bits.payload.addr + + io.meta_out.valid := meta_q.valid && addr_q.io.enq.ready + meta_q.ready := io.meta_out.ready && addr_q.io.enq.ready + io.data_out.valid := data_q.valid && addr_q.io.deq.valid + data_q.ready := io.data_out.ready && addr_q.io.deq.valid + addr_q.io.enq.valid := meta_q.valid && io.meta_out.ready && meta_has_data + addr_q.io.deq.ready := Bool(false) + + when(data_q.valid && data_q.ready) { + data_cnt := data_cnt_up + when(data_cnt_up === UFix(0)) { + addr_q.io.deq.ready := Bool(true) + } + } + } +} + +class TileLinkHeaderAppender[T <: SourcedMessage with HasPhysicalAddress](clientId: Int, nBanks: Int, bankIdLsb: Int)(metadata: => T)(implicit conf: UncoreConfiguration) extends AddressConverter { + implicit val ln = conf.ln + val io = new Bundle { + val meta_in = (new ClientSourcedIO){(new LogicalNetworkIO){ metadata }}.flip + val meta_out = (new ClientSourcedIO){(new LogicalNetworkIO){ metadata }} + } + val meta_q = Queue(io.meta_in) + io.meta_out.bits.payload := meta_q.bits.payload + io.meta_out.bits.header.src := UFix(clientId) + io.meta_out.valid := meta_q.valid + meta_q.ready := io.meta_out.ready + if(nBanks == 1) { + io.meta_out.bits.header.dst := UFix(0) + } else { + io.meta_out.bits.header.dst := convertAddrToBank(meta_q.bits.payload.addr, nBanks, bankIdLsb) + } +} + +class MemIOUncachedTileLinkIOConverter(qDepth: Int)(implicit conf: UncoreConfiguration) extends Component { + implicit val ln = conf.ln + val io = new Bundle { + val uncached = new UncachedTileLinkIO().flip + val mem = new ioMem + } + val mem_cmd_q = (new Queue(qDepth)){new MemReqCmd} + val mem_data_q = (new Queue(qDepth)){new MemData} + mem_cmd_q.io.enq.valid := io.uncached.acquire.valid + io.uncached.acquire.ready := mem_cmd_q.io.enq.ready + mem_cmd_q.io.enq.bits.rw := conf.co.needsOuterWrite(io.uncached.acquire.bits.payload.a_type, UFix(0)) + mem_cmd_q.io.enq.bits.tag := io.uncached.acquire.bits.payload.client_xact_id + mem_cmd_q.io.enq.bits.addr := io.uncached.acquire.bits.payload.addr + mem_data_q.io.enq.valid := io.uncached.acquire_data.valid + io.uncached.acquire_data.ready := mem_data_q.io.enq.ready + mem_data_q.io.enq.bits.data := io.uncached.acquire_data.bits.payload.data + io.uncached.grant.valid := io.mem.resp.valid + io.mem.resp.ready := io.uncached.grant.ready + io.uncached.grant.bits.payload.data := io.mem.resp.bits.data + io.uncached.grant.bits.payload.client_xact_id := io.mem.resp.bits.tag + io.uncached.grant.bits.payload.master_xact_id := UFix(0) // DNC + io.uncached.grant.bits.payload.g_type := UFix(0) // DNC + io.mem.req_cmd <> mem_cmd_q.io.deq + io.mem.req_data <> mem_data_q.io.deq +} object TileToCrossbarShim { def apply[T <: Data](logIO: ClientSourcedIO[LogicalNetworkIO[T]])(implicit lconf: LogicalNetworkConfiguration, pconf: PhysicalNetworkConfiguration) = { @@ -197,7 +314,7 @@ class ReferenceChipBackend extends VerilogBackend transforms += ((c: Component) => addMemPin(c)) } -class OuterMemorySystem(htif_width: Int, tileEndpoints: Seq[ClientCoherenceAgent])(implicit conf: CoherenceHubConfiguration) extends Component +class OuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenceAgent])(implicit conf: UncoreConfiguration) extends Component { implicit val lnconf = conf.ln val io = new Bundle { @@ -214,35 +331,31 @@ class OuterMemorySystem(htif_width: Int, tileEndpoints: Seq[ClientCoherenceAgent val lnWithHtifConf = conf.ln.copy(nEndpoints = conf.ln.nEndpoints+1, idBits = log2Up(conf.ln.nEndpoints+1)+1, nClients = conf.ln.nClients+1) - val chWithHtifConf = conf.copy(ln = lnWithHtifConf) - require(tileEndpoints.length == lnWithHtifConf.nClients) - //val hub = new CoherenceHubBroadcast()(chWithHtifConf) + val ucWithHtifConf = conf.copy(ln = lnWithHtifConf) + require(clientEndpoints.length == lnWithHtifConf.nClients) + val masterEndpoints = (0 until lnWithHtifConf.nMasters).map(new L2CoherenceAgent(_)(ucWithHtifConf)) + val llc_tag_leaf = Mem(1024, seqRead = true) { Bits(width = 72) } val llc_data_leaf = Mem(4096, seqRead = true) { Bits(width = 64) } val llc = new DRAMSideLLC(512, 8, 4, llc_tag_leaf, llc_data_leaf) //val llc = new DRAMSideLLCNull(NGLOBAL_XACTS, REFILL_CYCLES) val mem_serdes = new MemSerdes(htif_width) - //val hub = new CoherenceHubBroadcast()(chWithHtifConf) - //val adapter = new CoherenceHubAdapter()(lnWithHtifConf) - val hub = new L2CoherenceAgent()(chWithHtifConf) - val net = new ReferenceChipCrossbarNetwork(List(hub)++tileEndpoints)(lnWithHtifConf) - //net.io(0) <> adapter.io.net - //hub.io.tiles <> adapter.io.hub - hub.io.network <> net.io(0) + val net = new ReferenceChipCrossbarNetwork(masterEndpoints++clientEndpoints)(lnWithHtifConf) + net.io zip (masterEndpoints.map(_.io.client) ++ io.tiles :+ io.htif) map { case (net, end) => net <> end } + masterEndpoints.map{ _.io.incoherent zip (io.incoherent ++ List(Bool(true))) map { case (m, c) => m := c } } - for (i <- 1 to conf.ln.nClients) { - net.io(i) <> io.tiles(i-1) - //hub.io.tiles(i-1) <> io.tiles(i-1) - hub.io.incoherent(i-1) := io.incoherent(i-1) + val conv = new MemIOUncachedTileLinkIOConverter(2)(ucWithHtifConf) + if(lnWithHtifConf.nMasters > 1) { + val arb = new UncachedTileLinkIOArbiter(lnWithHtifConf.nMasters)(lnWithHtifConf) + arb.io.in zip masterEndpoints.map(_.io.master) map { case (arb, cache) => arb <> cache } + conv.io.uncached <> arb.io.out + } else { + conv.io.uncached <> masterEndpoints.head.io.master } - net.io(conf.ln.nClients+1) <> io.htif - //hub.io.tiles(conf.ln.nClients) <> io.htif - hub.io.incoherent(conf.ln.nClients) := Bool(true) - - llc.io.cpu.req_cmd <> Queue(hub.io.mem.req_cmd) - llc.io.cpu.req_data <> Queue(hub.io.mem.req_data, REFILL_CYCLES) - hub.io.mem.resp <> llc.io.cpu.resp + llc.io.cpu.req_cmd <> Queue(conv.io.mem.req_cmd) + llc.io.cpu.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES) + conv.io.mem.resp <> llc.io.cpu.resp // mux between main and backup memory ports val mem_cmdq = (new Queue(2)) { new MemReqCmd } @@ -267,7 +380,7 @@ class OuterMemorySystem(htif_width: Int, tileEndpoints: Seq[ClientCoherenceAgent io.mem_backup <> mem_serdes.io.narrow } -class Uncore(htif_width: Int, tileEndpoints: Seq[ClientCoherenceAgent])(implicit conf: CoherenceHubConfiguration) extends Component +class Uncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit conf: UncoreConfiguration) extends Component { implicit val lnconf = conf.ln val io = new Bundle { @@ -280,17 +393,38 @@ class Uncore(htif_width: Int, tileEndpoints: Seq[ClientCoherenceAgent])(implicit val htif = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients) }.flip val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput } + val nBanks = 1 + val bankIdLsb = 5 val htif = new rocketHTIF(htif_width) + val outmemsys = new OuterMemorySystem(htif_width, tileList :+ htif) htif.io.cpu <> io.htif - - val outmemsys = new OuterMemorySystem(htif_width, tileEndpoints++List(htif)) - outmemsys.io.tiles <> io.tiles - outmemsys.io.htif <> htif.io.mem outmemsys.io.incoherent <> io.incoherent io.mem <> outmemsys.io.mem outmemsys.io.mem_backup_en <> io.mem_backup_en + // Add networking headers and endpoint queues + (outmemsys.io.tiles :+ outmemsys.io.htif).zip(io.tiles :+ htif.io.mem).zipWithIndex.map { + case ((outer, client), i) => + val (acq_w_header, acq_data_w_header) = TileLinkHeaderAppender(client.acquire, client.acquire_data, i, nBanks, bankIdLsb) + outer.acquire <> acq_w_header + outer.acquire_data <> acq_data_w_header + + val (rel_w_header, rel_data_w_header) = TileLinkHeaderAppender(client.release, client.release_data, i, nBanks, bankIdLsb) + outer.release <> rel_w_header + outer.release_data <> rel_data_w_header + + val grant_ack_q = Queue(client.grant_ack) + outer.grant_ack.valid := grant_ack_q.valid + outer.grant_ack.bits := grant_ack_q.bits + outer.grant_ack.bits.header.src := UFix(i) + grant_ack_q.ready := outer.grant_ack.ready + + client.abort <> Queue(outer.abort) + client.grant <> Queue(outer.grant, 1, pipe = true) + client.probe <> Queue(outer.probe) + } + // pad out the HTIF using a divided clock val hio = (new SlowIO(512)) { Bits(width = htif_width+1) } hio.io.set_divisor.valid := htif.io.scr.wen && htif.io.scr.waddr === 63 @@ -332,6 +466,7 @@ class TopIO(htif_width: Int) extends Bundle { object DummyTopLevelConstants extends _root_.uncore.constants.CoherenceConfigConstants { val NTILES = 2 + val NBANKS = 2 val HTIF_WIDTH = 16 val ENABLE_SHARING = true val ENABLE_CLEAN_EXCLUSIVE = true @@ -354,8 +489,8 @@ class Top extends Component { else new MICoherence } - implicit val lnConf = LogicalNetworkConfiguration(NTILES+1, log2Up(NTILES)+1, 1, NTILES) - implicit val chConf = CoherenceHubConfiguration(co, lnConf) + implicit val lnConf = LogicalNetworkConfiguration(NTILES+NBANKS, log2Up(NTILES)+1, NBANKS, NTILES) + implicit val uConf = UncoreConfiguration(co, lnConf) val io = new TopIO(HTIF_WIDTH) @@ -376,49 +511,14 @@ class Top extends Component { resetSigs(i) := hl.reset val tile = tileList(i) - + tile.io.tilelink <> tl + il := hl.reset tile.io.host.reset := Reg(Reg(hl.reset)) tile.io.host.pcr_req <> Queue(hl.pcr_req) hl.pcr_rep <> Queue(tile.io.host.pcr_rep) hl.ipi_req <> Queue(tile.io.host.ipi_req) tile.io.host.ipi_rep <> Queue(hl.ipi_rep) error_mode = error_mode || Reg(tile.io.host.debug.error_mode) - - val x_init_q = Queue(tile.io.tilelink.acquire) - tl.acquire.valid := x_init_q.valid - tl.acquire.bits.payload := x_init_q.bits.payload - tl.acquire.bits.header.src := UFix(i) - tl.acquire.bits.header.dst := UFix(0) - x_init_q.ready := tl.acquire.ready - val x_init_data_q = Queue(tile.io.tilelink.acquire_data) - tl.acquire_data.valid := x_init_data_q.valid - tl.acquire_data.bits.payload := x_init_data_q.bits.payload - tl.acquire_data.bits.header.src := UFix(i) - tl.acquire_data.bits.header.dst := UFix(0) - x_init_data_q.ready := tl.acquire_data.ready - val x_finish_q = Queue(tile.io.tilelink.grant_ack) - tl.grant_ack.valid := x_finish_q.valid - tl.grant_ack.bits.payload := x_finish_q.bits.payload - tl.grant_ack.bits.header.src := UFix(i) - tl.grant_ack.bits.header.dst := UFix(0) - x_finish_q.ready := tl.grant_ack.ready - val p_rep_q = Queue(tile.io.tilelink.release, 1) - tl.release.valid := p_rep_q.valid - tl.release.bits.payload := p_rep_q.bits.payload - tl.release.bits.header.src := UFix(i) - tl.release.bits.header.dst := UFix(0) - p_rep_q.ready := tl.release.ready - val p_rep_data_q = Queue(tile.io.tilelink.release_data) - tl.release_data.valid := p_rep_data_q.valid - tl.release_data.bits.payload := p_rep_data_q.bits.payload - tl.release_data.bits.header.src := UFix(i) - tl.release_data.bits.header.dst := UFix(0) - p_rep_data_q.ready := tl.release_data.ready - - tile.io.tilelink.grant <> Queue(tl.grant, 1, pipe = true) - tile.io.tilelink.probe <> Queue(tl.probe) - il := hl.reset - } io.host <> uncore.io.host diff --git a/src/main/scala/fpga.scala b/src/main/scala/fpga.scala index fb880822..922650c7 100644 --- a/src/main/scala/fpga.scala +++ b/src/main/scala/fpga.scala @@ -6,7 +6,7 @@ import uncore._ import rocket._ import rocket.Constants._ -class FPGAUncore(htif_width: Int)(implicit conf: CoherenceHubConfiguration) extends Component +class FPGAUncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit conf: UncoreConfiguration) extends Component { val io = new Bundle { val host = new HostIO(htif_width) @@ -20,16 +20,29 @@ class FPGAUncore(htif_width: Int)(implicit conf: CoherenceHubConfiguration) exte htif.io.cpu <> io.htif io.host <> htif.io.host - val lnWithHtif = conf.ln.copy(nEndpoints = conf.ln.nEndpoints+1, nClients = conf.ln.nClients+1) - val hub = new CoherenceHubBroadcast()(conf.copy(ln = lnWithHtif)) - for (i <- 0 until conf.ln.nClients) - hub.io.tiles(i) <> io.tiles(i) - hub.io.tiles(conf.ln.nClients) <> htif.io.mem - hub.io.incoherent <> io.incoherent + val lnWithHtifConf = conf.ln.copy(nEndpoints = conf.ln.nEndpoints+1, + idBits = log2Up(conf.ln.nEndpoints+1)+1, + nClients = conf.ln.nClients+1) + val ucWithHtifConf = conf.copy(ln = lnWithHtifConf) + val clientEndpoints = tileList :+ htif + val masterEndpoints = List.fill(lnWithHtifConf.nMasters)(new L2CoherenceAgent(0)(ucWithHtifConf)) - io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) - io.mem.req_data <> Queue(hub.io.mem.req_data, REFILL_CYCLES*2) - hub.io.mem.resp <> Queue(io.mem.resp, REFILL_CYCLES*2) + val net = new ReferenceChipCrossbarNetwork(masterEndpoints++clientEndpoints)(lnWithHtifConf) + net.io zip (masterEndpoints.map(_.io.client) ++ io.tiles :+ io.htif) map { case (net, end) => net <> end } + masterEndpoints.map{ _.io.incoherent zip (io.incoherent ++ List(Bool(true))) map { case (m, c) => m := c } } + + val conv = new MemIOUncachedTileLinkIOConverter(2)(ucWithHtifConf) + if(lnWithHtifConf.nMasters > 1) { + val arb = new UncachedTileLinkIOArbiter(lnWithHtifConf.nMasters)(lnWithHtifConf) + arb.io.in zip masterEndpoints.map(_.io.master) map { case (arb, cache) => arb <> cache } + conv.io.uncached <> arb.io.out + } else { + conv.io.uncached <> masterEndpoints.head.io.master + } + + io.mem.req_cmd <> Queue(conv.io.mem.req_cmd) + io.mem.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES*2) + conv.io.mem.resp <> Queue(io.mem.resp, REFILL_CYCLES*2) } class FPGATop extends Component { @@ -41,8 +54,7 @@ class FPGATop extends Component { } val co = new MESICoherence implicit val lnConf = LogicalNetworkConfiguration(4, 3, 1, 3) - implicit val uconf = CoherenceHubConfiguration(co, lnConf) - val uncore = new FPGAUncore(htif_width = htif_width) + implicit val uconf = UncoreConfiguration(co, lnConf) val resetSigs = Vec(uconf.ln.nClients){ Bool() } val ic = ICacheConfig(64, 1, co, ntlb = 4, nbtb = 4) @@ -52,6 +64,7 @@ class FPGATop extends Component { fastMulDiv = false, fpu = false, vec = false) val tileList = (0 until uconf.ln.nClients).map(r => new Tile(resetSignal = resetSigs(r))(rc)) + val uncore = new FPGAUncore(htif_width = htif_width, tileList = tileList) io.debug.error_mode := Bool(false) for (i <- 0 until uconf.ln.nClients) {