diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 333397af..9e27f6ed 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -2,8 +2,75 @@ package rocket import Chisel._ import Node._ +import Constants._ import uncore._ +class ioHellaCacheArbiter(n: Int) extends Bundle +{ + val requestor = Vec(n) { new ioHellaCache() }.flip + val mem = new ioHellaCache +} + +class rocketHellaCacheArbiter(n: Int) extends Component +{ + val io = new ioHellaCacheArbiter(n) + require(DCACHE_TAG_BITS >= log2Up(n) + CPU_TAG_BITS) + + var req_val = Bool(false) + var req_rdy = io.mem.req.ready + for (i <- 0 until n) + { + io.requestor(i).req.ready := req_rdy + req_val = req_val || io.requestor(i).req.valid + req_rdy = req_rdy && !io.requestor(i).req.valid + } + + var req_cmd = io.requestor(n-1).req.bits.cmd + var req_type = io.requestor(n-1).req.bits.typ + var req_idx = io.requestor(n-1).req.bits.idx + var req_ppn = io.requestor(n-1).req.bits.ppn + var req_data = io.requestor(n-1).req.bits.data + var req_kill = io.requestor(n-1).req.bits.kill + var req_tag = io.requestor(n-1).req.bits.tag + for (i <- n-1 to 0 by -1) + { + val r = io.requestor(i).req + req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) + req_type = Mux(r.valid, r.bits.typ, req_type) + req_idx = Mux(r.valid, r.bits.idx, req_idx) + req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) + req_data = Mux(Reg(r.valid), r.bits.data, req_data) + req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) + req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) + } + + io.mem.req.valid := req_val + io.mem.req.bits.cmd := req_cmd + io.mem.req.bits.typ := req_type + io.mem.req.bits.idx := req_idx + io.mem.req.bits.ppn := req_ppn + io.mem.req.bits.data := req_data + io.mem.req.bits.kill := req_kill + io.mem.req.bits.tag := req_tag + + for (i <- 0 until n) + { + val r = io.requestor(i).resp + val x = io.requestor(i).xcpt + val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) + x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) + x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) + r.valid := io.mem.resp.valid && tag_hit + r.bits.miss := io.mem.resp.bits.miss && tag_hit + r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) + r.bits.replay := io.mem.resp.bits.replay && tag_hit + r.bits.data := io.mem.resp.bits.data + r.bits.data_subword := io.mem.resp.bits.data_subword + r.bits.typ := io.mem.resp.bits.typ + r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + } +} + class ioUncachedRequestor extends Bundle { val xact_init = (new FIFOIO) { new TransactionInit } val xact_abort = (new FIFOIO) { new TransactionAbort }.flip diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1e3d0578..9ec91845 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,7 +5,7 @@ import Chisel._ import scala.math._ abstract trait MulticoreConstants { - val NTILES: Int = 1 + val NTILES: Int val TILE_ID_BITS = log2Up(NTILES)+1 } @@ -19,19 +19,46 @@ trait UncoreConstants { val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) } -trait HTIFConstants { - val HTIF_WIDTH = 16 - val MEM_BACKUP_WIDTH = HTIF_WIDTH +trait TileLinkTypeConstants { + val X_INIT_TYPE_MAX_BITS = 2 + val X_REP_TYPE_MAX_BITS = 3 + val P_REQ_TYPE_MAX_BITS = 2 + val P_REP_TYPE_MAX_BITS = 3 } -abstract trait TileConfigConstants extends UncoreConstants with MulticoreConstants { - val HAVE_RVC: Boolean - val HAVE_FPU: Boolean - val HAVE_VEC: Boolean - def FPU_N = UFix(0, 1) - def FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N - def VEC_N = UFix(0, 1); - def VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N +trait TileLinkSizeConstants extends + RocketDcacheConstants with + TileLinkTypeConstants +{ + val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 + val X_INIT_WRITE_MASK_BITS = OFFSET_BITS + val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) + val X_INIT_ATOMIC_OP_BITS = 4 +} + +trait HTIFConstants { + val HTIF_WIDTH = 16 +} + +trait MemoryInterfaceConstants extends + HTIFConstants with + UncoreConstants with + TileLinkSizeConstants +{ + val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) + val MEM_DATA_BITS = 128 + val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS + val MEM_BACKUP_WIDTH = HTIF_WIDTH +} + +abstract trait TileConfigConstants { + def HAVE_RVC: Boolean + def HAVE_FPU: Boolean + def HAVE_VEC: Boolean + val FPU_N = UFix(0, 1) + val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N + val VEC_N = UFix(0, 1); + val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N } trait ScalarOpConstants { @@ -202,39 +229,21 @@ trait AddressConstants { val PERM_BITS = 6; } -abstract trait RocketDcacheConstants extends TileConfigConstants with AddressConstants { - val DCACHE_PORTS = 3 +abstract trait RocketDcacheConstants extends ArbiterConstants with AddressConstants { val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = log2Up(DCACHE_PORTS) + CPU_TAG_BITS - val OFFSET_BITS = 6; // log2(cache line size in bytes) + val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses val NRPQ = 16; // number of secondary misses val NSDQ = 17; // number of secondary stores/AMOs - val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) + val OFFSET_BITS = 6; // log2(cache line size in bytes) val IDX_BITS = 7; val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; val NWAYS = 4 require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); } -trait TileLinkSizeConstants extends RocketDcacheConstants { - val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 - val X_INIT_TYPE_MAX_BITS = 2 - val X_INIT_WRITE_MASK_BITS = OFFSET_BITS - val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) - val X_INIT_ATOMIC_OP_BITS = 4 - val X_REP_TYPE_MAX_BITS = 3 - val P_REQ_TYPE_MAX_BITS = 2 - val P_REP_TYPE_MAX_BITS = 3 -} - -trait MemoryInterfaceConstants extends UncoreConstants with TileLinkSizeConstants { - val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) - val MEM_DATA_BITS = 128 - val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS -} - trait TLBConstants { val DTLB_ENTRIES = 16 val ITLB_ENTRIES = 8; @@ -266,12 +275,19 @@ trait VectorOpConstants { val VIMM2_X = UFix(0, 1) } -trait ArbiterConstants { +abstract trait ArbiterConstants extends TileConfigConstants { + val DTLB_PORTS = 3 val DTLB_CPU = 0 val DTLB_VEC = 1 val DTLB_VPF = 2 - val DMEM_CPU = 0 - val DMEM_PTW = 1 - val DMEM_VU = 2 + val DCACHE_PORTS = 3 + val DCACHE_CPU = 0 + val DCACHE_PTW = 1 + val DCACHE_VU = 2 + + val DMEM_PORTS = if (HAVE_VEC) 3 else 2 + val DMEM_DCACHE = 0 + val DMEM_ICACHE = 1 + val DMEM_VICACHE = 2 } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 412d7e11..8f7f49fc 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -30,8 +30,8 @@ class rocketProc()(implicit conf: Configuration) extends Component { vu = new vu() // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new RRArbiter(3)({new ioDTLB_CPU_req_bundle()}) - val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2Up(3))) + val dtlbarb = new RRArbiter(DTLB_PORTS)({new ioDTLB_CPU_req_bundle()}) + val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2Up(DTLB_PORTS))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } // tlb respones come out a cycle later @@ -86,15 +86,15 @@ class rocketProc()(implicit conf: Configuration) extends Component dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status - arb.io.requestor(DMEM_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req.ready + arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn + ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DCACHE_CPU).req.ready // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) ptw.io.requestor(0) <> itlb.io.ptw ptw.io.requestor(1) <> dtlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DMEM_PTW) <> ptw.io.mem + arb.io.requestor(DCACHE_PTW) <> ptw.io.mem arb.io.mem <> io.dmem ctrl.io.dpath <> dpath.io.ctrl; @@ -119,17 +119,17 @@ class rocketProc()(implicit conf: Configuration) extends Component // connect arbiter to ctrl+dpath+DTLB //TODO: views on nested bundles? - arb.io.requestor(DMEM_CPU).resp <> ctrl.io.dmem.resp - arb.io.requestor(DMEM_CPU).xcpt <> ctrl.io.dmem.xcpt - arb.io.requestor(DMEM_CPU).resp <> dpath.io.dmem.resp - arb.io.requestor(DMEM_CPU).req.valid := ctrl.io.dmem.req.valid - ctrl.io.dmem.req.ready := arb.io.requestor(DMEM_CPU).req.ready - arb.io.requestor(DMEM_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill - arb.io.requestor(DMEM_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd - arb.io.requestor(DMEM_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ - arb.io.requestor(DMEM_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx - arb.io.requestor(DMEM_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag - arb.io.requestor(DMEM_CPU).req.bits.data := dpath.io.dmem.req.bits.data + arb.io.requestor(DCACHE_CPU).resp <> ctrl.io.dmem.resp + arb.io.requestor(DCACHE_CPU).xcpt <> ctrl.io.dmem.xcpt + arb.io.requestor(DCACHE_CPU).resp <> dpath.io.dmem.resp + arb.io.requestor(DCACHE_CPU).req.valid := ctrl.io.dmem.req.valid + ctrl.io.dmem.req.ready := arb.io.requestor(DCACHE_CPU).req.ready + arb.io.requestor(DCACHE_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill + arb.io.requestor(DCACHE_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd + arb.io.requestor(DCACHE_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ + arb.io.requestor(DCACHE_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx + arb.io.requestor(DCACHE_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag + arb.io.requestor(DCACHE_CPU).req.bits.data := dpath.io.dmem.req.bits.data var fpu: rocketFPU = null if (HAVE_FPU) @@ -217,21 +217,21 @@ class rocketProc()(implicit conf: Configuration) extends Component storegen.io.typ := vu.io.dmem_req.bits.typ storegen.io.din := vu.io.dmem_req.bits.data - arb.io.requestor(DMEM_VU).req.valid := vu.io.dmem_req.valid - arb.io.requestor(DMEM_VU).req.bits.kill := vu.io.dmem_req.bits.kill - arb.io.requestor(DMEM_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(DMEM_VU).req.bits.typ := vu.io.dmem_req.bits.typ - arb.io.requestor(DMEM_VU).req.bits.idx := vu.io.dmem_req.bits.idx - arb.io.requestor(DMEM_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DMEM_VU).req.bits.data := Reg(storegen.io.dout) - arb.io.requestor(DMEM_VU).req.bits.tag := vu.io.dmem_req.bits.tag + arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid + arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill + arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd + arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ + arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx + arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) + arb.io.requestor(DCACHE_VU).req.bits.data := Reg(storegen.io.dout) + arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag - vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req.ready - vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp.valid) - vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp.bits.nack - vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp.bits.tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp.bits.typ) + vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready + vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) + vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack + vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword + vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) + vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -243,7 +243,7 @@ class rocketProc()(implicit conf: Configuration) extends Component } else { - arb.io.requestor(DMEM_VU).req.valid := Bool(false) + arb.io.requestor(DCACHE_VU).req.valid := Bool(false) if (HAVE_FPU) { fpu.io.sfma.valid := Bool(false) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 108c5860..aaa06dcd 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -6,20 +6,19 @@ import scala.math._ //TODO: When compiler bug SI-5604 is fixed in 2.10, change object Constants to // package object rocket and remove import Constants._'s from other files -object Constants extends HTIFConstants with +object Constants extends + ScalarOpConstants with MemoryOpConstants with PCRConstants with InterruptConstants with AddressConstants with - ArbiterConstants with VectorOpConstants with TLBConstants with - ScalarOpConstants with MemoryInterfaceConstants { - val HAVE_RVC = false - val HAVE_FPU = true - val HAVE_VEC = true + def HAVE_RVC = false + def HAVE_FPU = true + def HAVE_VEC = true val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 92550d77..11d8f0b8 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,72 +5,6 @@ import Node._ import Constants._ import scala.math._ -class ioHellaCacheArbiter(n: Int) extends Bundle -{ - val requestor = Vec(n) { new ioHellaCache() }.flip - val mem = new ioHellaCache -} - -class rocketHellaCacheArbiter(n: Int) extends Component -{ - val io = new ioHellaCacheArbiter(n) - require(DCACHE_TAG_BITS >= log2Up(n) + CPU_TAG_BITS) - - var req_val = Bool(false) - var req_rdy = io.mem.req.ready - for (i <- 0 until n) - { - io.requestor(i).req.ready := req_rdy - req_val = req_val || io.requestor(i).req.valid - req_rdy = req_rdy && !io.requestor(i).req.valid - } - - var req_cmd = io.requestor(n-1).req.bits.cmd - var req_type = io.requestor(n-1).req.bits.typ - var req_idx = io.requestor(n-1).req.bits.idx - var req_ppn = io.requestor(n-1).req.bits.ppn - var req_data = io.requestor(n-1).req.bits.data - var req_kill = io.requestor(n-1).req.bits.kill - var req_tag = io.requestor(n-1).req.bits.tag - for (i <- n-1 to 0 by -1) - { - val r = io.requestor(i).req - req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) - req_type = Mux(r.valid, r.bits.typ, req_type) - req_idx = Mux(r.valid, r.bits.idx, req_idx) - req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) - req_data = Mux(Reg(r.valid), r.bits.data, req_data) - req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) - req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) - } - - io.mem.req.valid := req_val - io.mem.req.bits.cmd := req_cmd - io.mem.req.bits.typ := req_type - io.mem.req.bits.idx := req_idx - io.mem.req.bits.ppn := req_ppn - io.mem.req.bits.data := req_data - io.mem.req.bits.kill := req_kill - io.mem.req.bits.tag := req_tag - - for (i <- 0 until n) - { - val r = io.requestor(i).resp - val x = io.requestor(i).xcpt - val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) - x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) - x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) - r.valid := io.mem.resp.valid && tag_hit - r.bits.miss := io.mem.resp.bits.miss && tag_hit - r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) - r.bits.replay := io.mem.resp.bits.replay && tag_hit - r.bits.data := io.mem.resp.bits.data - r.bits.data_subword := io.mem.resp.bits.data_subword - r.bits.typ := io.mem.resp.bits.typ - r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) - } -} - class ioPTW(n: Int) extends Bundle { val requestor = Vec(n) { new ioTLB_PTW }.flip diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 30e961d6..69aef22d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -16,9 +16,9 @@ class Tile(resetSignal: Bool = null)(implicit conf: Configuration) extends Compo val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) val dcache = new HellaCache - val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) - arbiter.io.requestor(0) <> dcache.io.mem - arbiter.io.requestor(1) <> icache.io.mem + val arbiter = new rocketMemArbiter(DMEM_PORTS) + arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem + arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem io.tilelink.xact_init <> arbiter.io.mem.xact_init io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data @@ -32,7 +32,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: Configuration) extends Compo if (HAVE_VEC) { val vicache = new rocketICache(128, 1) // 128 sets x 1 ways (8KB) - arbiter.io.requestor(2) <> vicache.io.mem + arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 35c41e79..7b29021c 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -6,8 +6,8 @@ import Constants._ import uncore._ import collection.mutable.ArrayBuffer -object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants { -// val NTILES = 1 +object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants with rocket.constants.MulticoreConstants { + val NTILES = 1 val ENABLE_SHARING = true val ENABLE_CLEAN_EXCLUSIVE = true }