Add Instruction Tightly Integrated Memory
This commit is contained in:
		@@ -42,10 +42,15 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
 | 
				
			|||||||
  val acquire = Bool(INPUT)
 | 
					  val acquire = Bool(INPUT)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Frontend(implicit p: Parameters) extends LazyModule {
 | 
					class Frontend(hartid: Int)(implicit p: Parameters) extends LazyModule {
 | 
				
			||||||
  lazy val module = new FrontendModule(this)
 | 
					  lazy val module = new FrontendModule(this)
 | 
				
			||||||
  val icache = LazyModule(new ICache(latency = 2))
 | 
					  val icache = LazyModule(new ICache(latency = 2, hartid))
 | 
				
			||||||
  val node = TLOutputNode()
 | 
					  val node = TLOutputNode()
 | 
				
			||||||
 | 
					  val slaveNode = icache.slaveNode.map { n =>
 | 
				
			||||||
 | 
					    val res = TLInputNode()
 | 
				
			||||||
 | 
					    n := res
 | 
				
			||||||
 | 
					    res
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  node := icache.node
 | 
					  node := icache.node
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -53,8 +58,10 @@ class Frontend(implicit p: Parameters) extends LazyModule {
 | 
				
			|||||||
class FrontendBundle(outer: Frontend) extends CoreBundle()(outer.p) {
 | 
					class FrontendBundle(outer: Frontend) extends CoreBundle()(outer.p) {
 | 
				
			||||||
  val cpu = new FrontendIO().flip
 | 
					  val cpu = new FrontendIO().flip
 | 
				
			||||||
  val ptw = new TLBPTWIO()
 | 
					  val ptw = new TLBPTWIO()
 | 
				
			||||||
  val mem = outer.node.bundleOut
 | 
					  val tl_out = outer.node.bundleOut
 | 
				
			||||||
 | 
					  val tl_in = outer.slaveNode.map(_.bundleIn)
 | 
				
			||||||
  val resetVector = UInt(INPUT, vaddrBitsExtended)
 | 
					  val resetVector = UInt(INPUT, vaddrBitsExtended)
 | 
				
			||||||
 | 
					  val hartid = UInt(INPUT, p(XLen))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
					class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			||||||
@@ -68,8 +75,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 3, flow = true)) }
 | 
					  val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 3, flow = true)) }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s0_valid = io.cpu.req.valid || fq.io.enq.ready
 | 
					  val s0_valid = io.cpu.req.valid || fq.io.enq.ready
 | 
				
			||||||
  val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
 | 
					  val s1_pc = Reg(UInt(width=vaddrBitsExtended))
 | 
				
			||||||
  val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
 | 
					 | 
				
			||||||
  val s1_speculative = Reg(Bool())
 | 
					  val s1_speculative = Reg(Bool())
 | 
				
			||||||
  val s2_valid = Reg(init=Bool(true))
 | 
					  val s2_valid = Reg(init=Bool(true))
 | 
				
			||||||
  val s2_pc = Reg(init=io.resetVector)
 | 
					  val s2_pc = Reg(init=io.resetVector)
 | 
				
			||||||
@@ -94,7 +100,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  s2_replay := (s2_valid && !fq.io.enq.fire()) || RegNext(s2_replay && !s0_valid)
 | 
					  s2_replay := (s2_valid && !fq.io.enq.fire()) || RegNext(s2_replay && !s0_valid)
 | 
				
			||||||
  val npc = Mux(s2_replay, s2_pc, predicted_npc)
 | 
					  val npc = Mux(s2_replay, s2_pc, predicted_npc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  s1_pc_ := io.cpu.npc
 | 
					  s1_pc := io.cpu.npc
 | 
				
			||||||
  // consider RVC fetches across blocks to be non-speculative if the first
 | 
					  // consider RVC fetches across blocks to be non-speculative if the first
 | 
				
			||||||
  // part was non-speculative
 | 
					  // part was non-speculative
 | 
				
			||||||
  val s0_speculative =
 | 
					  val s0_speculative =
 | 
				
			||||||
@@ -116,7 +122,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  if (usingBTB) {
 | 
					  if (usingBTB) {
 | 
				
			||||||
    val btb = Module(new BTB)
 | 
					    val btb = Module(new BTB)
 | 
				
			||||||
    btb.io.req.valid := false
 | 
					    btb.io.req.valid := false
 | 
				
			||||||
    btb.io.req.bits.addr := s1_pc_
 | 
					    btb.io.req.bits.addr := s1_pc
 | 
				
			||||||
    btb.io.btb_update := io.cpu.btb_update
 | 
					    btb.io.btb_update := io.cpu.btb_update
 | 
				
			||||||
    btb.io.bht_update := io.cpu.bht_update
 | 
					    btb.io.bht_update := io.cpu.bht_update
 | 
				
			||||||
    btb.io.ras_update := io.cpu.ras_update
 | 
					    btb.io.ras_update := io.cpu.ras_update
 | 
				
			||||||
@@ -148,16 +154,18 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  tlb.io.req.bits.sfence := io.cpu.sfence
 | 
					  tlb.io.req.bits.sfence := io.cpu.sfence
 | 
				
			||||||
  tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
 | 
					  tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  icache.io.hartid := io.hartid
 | 
				
			||||||
  icache.io.req.valid := s0_valid
 | 
					  icache.io.req.valid := s0_valid
 | 
				
			||||||
  icache.io.req.bits.addr := io.cpu.npc
 | 
					  icache.io.req.bits.addr := io.cpu.npc
 | 
				
			||||||
  icache.io.invalidate := io.cpu.flush_icache
 | 
					  icache.io.invalidate := io.cpu.flush_icache
 | 
				
			||||||
  icache.io.s1_paddr := tlb.io.resp.paddr
 | 
					  icache.io.s1_paddr := tlb.io.resp.paddr
 | 
				
			||||||
 | 
					  icache.io.s2_vaddr := s2_pc
 | 
				
			||||||
  icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
 | 
					  icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
 | 
				
			||||||
  icache.io.s2_kill := s2_speculative && !s2_cacheable || s2_xcpt
 | 
					  icache.io.s2_kill := s2_speculative && !s2_cacheable || s2_xcpt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
 | 
					  fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
 | 
				
			||||||
  fq.io.enq.bits.pc := s2_pc
 | 
					  fq.io.enq.bits.pc := s2_pc
 | 
				
			||||||
  io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
 | 
					  io.cpu.npc := ~(~Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) | (coreInstBytes-1)) // discard LSB(s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  fq.io.enq.bits.data := icache.io.resp.bits
 | 
					  fq.io.enq.bits.data := icache.io.resp.bits
 | 
				
			||||||
  fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
 | 
					  fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
 | 
				
			||||||
@@ -170,13 +178,14 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  io.cpu.resp <> fq.io.deq
 | 
					  io.cpu.resp <> fq.io.deq
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // performance events
 | 
					  // performance events
 | 
				
			||||||
  io.cpu.acquire := edge.done(icache.io.mem(0).a)
 | 
					  io.cpu.acquire := edge.done(icache.io.tl_out(0).a)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
 | 
					/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
 | 
				
			||||||
trait HasICacheFrontend extends CanHavePTW with HasTileLinkMasterPort {
 | 
					trait HasICacheFrontend extends CanHavePTW with HasTileLinkMasterPort {
 | 
				
			||||||
  val module: HasICacheFrontendModule
 | 
					  val module: HasICacheFrontendModule
 | 
				
			||||||
  val frontend = LazyModule(new Frontend)
 | 
					  val frontend = LazyModule(new Frontend(hartid: Int))
 | 
				
			||||||
 | 
					  val hartid: Int
 | 
				
			||||||
  masterNode := frontend.node
 | 
					  masterNode := frontend.node
 | 
				
			||||||
  nPTWPorts += 1
 | 
					  nPTWPorts += 1
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -19,6 +19,7 @@ case class ICacheParams(
 | 
				
			|||||||
    nTLBEntries: Int = 32,
 | 
					    nTLBEntries: Int = 32,
 | 
				
			||||||
    cacheIdBits: Int = 0,
 | 
					    cacheIdBits: Int = 0,
 | 
				
			||||||
    ecc: Option[Code] = None,
 | 
					    ecc: Option[Code] = None,
 | 
				
			||||||
 | 
					    itimAddr: Option[BigInt] = None,
 | 
				
			||||||
    blockBytes: Int = 64) extends L1CacheParams {
 | 
					    blockBytes: Int = 64) extends L1CacheParams {
 | 
				
			||||||
  def replacement = new RandomReplacement(nWays)
 | 
					  def replacement = new RandomReplacement(nWays)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -31,65 +32,115 @@ class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICache
 | 
				
			|||||||
  val addr = UInt(width = vaddrBits)
 | 
					  val addr = UInt(width = vaddrBits)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICacheParameters {
 | 
					class ICache(val latency: Int, val hartid: Int)(implicit p: Parameters) extends LazyModule
 | 
				
			||||||
  val data = Bits(width = coreInstBits)
 | 
					    with HasRocketCoreParameters {
 | 
				
			||||||
  val datablock = Bits(width = rowBits)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ICache(val latency: Int)(implicit p: Parameters) extends LazyModule {
 | 
					 | 
				
			||||||
  lazy val module = new ICacheModule(this)
 | 
					  lazy val module = new ICacheModule(this)
 | 
				
			||||||
  val node = TLClientNode(TLClientParameters(sourceId = IdRange(0,1)))
 | 
					  val node = TLClientNode(TLClientParameters(sourceId = IdRange(0,1)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val icacheParams = tileParams.icache.get
 | 
				
			||||||
 | 
					  val slaveNode = icacheParams.itimAddr.map { itimAddr =>
 | 
				
			||||||
 | 
					    val wordBytes = coreInstBytes * fetchWidth
 | 
				
			||||||
 | 
					    val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes
 | 
				
			||||||
 | 
					    TLManagerNode(Seq(TLManagerPortParameters(
 | 
				
			||||||
 | 
					      Seq(TLManagerParameters(
 | 
				
			||||||
 | 
					        address         = Seq(AddressSet(itimAddr, size-1)),
 | 
				
			||||||
 | 
					        regionType      = RegionType.UNCACHED,
 | 
				
			||||||
 | 
					        executable      = true,
 | 
				
			||||||
 | 
					        supportsPutFull = TransferSizes(1, wordBytes),
 | 
				
			||||||
 | 
					        supportsGet     = TransferSizes(1, wordBytes),
 | 
				
			||||||
 | 
					        fifoId          = Some(0))), // requests handled in FIFO order
 | 
				
			||||||
 | 
					      beatBytes = wordBytes,
 | 
				
			||||||
 | 
					      minLatency = 1)))
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
 | 
					class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
 | 
				
			||||||
  val req = Valid(new ICacheReq).flip
 | 
					  val hartid = UInt(INPUT, p(XLen))
 | 
				
			||||||
 | 
					  val req = Decoupled(new ICacheReq).flip
 | 
				
			||||||
  val s1_paddr = UInt(INPUT, paddrBits) // delayed one cycle w.r.t. req
 | 
					  val s1_paddr = UInt(INPUT, paddrBits) // delayed one cycle w.r.t. req
 | 
				
			||||||
 | 
					  val s2_vaddr = UInt(INPUT, vaddrBits) // delayed two cycles w.r.t. req
 | 
				
			||||||
  val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
 | 
					  val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
 | 
				
			||||||
  val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
 | 
					  val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val resp = Valid(UInt(width = coreInstBits * fetchWidth))
 | 
					  val resp = Valid(UInt(width = coreInstBits * fetchWidth))
 | 
				
			||||||
  val invalidate = Bool(INPUT)
 | 
					  val invalidate = Bool(INPUT)
 | 
				
			||||||
  val mem = outer.node.bundleOut
 | 
					  val tl_out = outer.node.bundleOut
 | 
				
			||||||
 | 
					  val tl_in = outer.slaveNode.map(_.bundleIn)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// get a tile-specific property without breaking deduplication
 | 
				
			||||||
 | 
					object GetPropertyByHartId {
 | 
				
			||||||
 | 
					  def apply[T <: Data](tiles: Seq[RocketTileParams], f: RocketTileParams => Option[T], hartId: UInt): T = {
 | 
				
			||||||
 | 
					    PriorityMux(tiles.zipWithIndex.collect { case (t, i) if f(t).nonEmpty => (hartId === i) -> f(t).get })
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
					class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			||||||
    with HasL1ICacheParameters {
 | 
					    with HasL1ICacheParameters {
 | 
				
			||||||
  val io = new ICacheBundle(outer)
 | 
					  val io = new ICacheBundle(outer)
 | 
				
			||||||
  val edge = outer.node.edgesOut(0)
 | 
					  val edge_out = outer.node.edgesOut.head
 | 
				
			||||||
  val tl_out = io.mem(0)
 | 
					  val tl_out = io.tl_out.head
 | 
				
			||||||
 | 
					  val edge_in = outer.slaveNode.map(_.edgesIn.head)
 | 
				
			||||||
 | 
					  val tl_in = io.tl_in.map(_.head)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  require(isPow2(nSets) && isPow2(nWays))
 | 
					  require(isPow2(nSets) && isPow2(nWays))
 | 
				
			||||||
  require(isPow2(coreInstBytes))
 | 
					  require(isPow2(coreInstBytes))
 | 
				
			||||||
  require(!usingVM || pgIdxBits >= untagBits)
 | 
					  require(!usingVM || pgIdxBits >= untagBits)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val scratchpadOn = RegInit(false.B)
 | 
				
			||||||
 | 
					  val scratchpadMax = Reg(UInt(width = log2Ceil(nSets * (nWays - 1))))
 | 
				
			||||||
 | 
					  def lineInScratchpad(line: UInt) = scratchpadOn && line <= scratchpadMax
 | 
				
			||||||
 | 
					  def addrMaybeInScratchpad(addr: UInt) = if (outer.icacheParams.itimAddr.isEmpty) false.B else {
 | 
				
			||||||
 | 
					    val base = GetPropertyByHartId(p(coreplex.RocketTilesKey), _.icache.flatMap(_.itimAddr.map(_.U)), io.hartid)
 | 
				
			||||||
 | 
					    val size = nSets * nWays * cacheBlockBytes
 | 
				
			||||||
 | 
					    addr >= base && addr < base + size
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  def addrInScratchpad(addr: UInt) = addrMaybeInScratchpad(addr) && lineInScratchpad(addr(untagBits+log2Ceil(nWays)-1, blockOffBits))
 | 
				
			||||||
 | 
					  def scratchpadWay(addr: UInt) = addr(untagBits+log2Ceil(nWays)-1, untagBits)
 | 
				
			||||||
 | 
					  def scratchpadWayValid(way: UInt) = way < nWays - 1
 | 
				
			||||||
 | 
					  def scratchpadLine(addr: UInt) = addr(untagBits+log2Ceil(nWays)-1, blockOffBits)
 | 
				
			||||||
 | 
					  val s0_slaveValid = tl_in.map(_.a.fire()).getOrElse(false.B)
 | 
				
			||||||
 | 
					  val s1_slaveValid = RegNext(s0_slaveValid, false.B)
 | 
				
			||||||
 | 
					  val s2_slaveValid = RegNext(s1_slaveValid, false.B)
 | 
				
			||||||
 | 
					  val s3_slaveValid = RegNext(s2_slaveValid, false.B)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s_ready :: s_request :: s_refill :: Nil = Enum(UInt(), 3)
 | 
					  val s_ready :: s_request :: s_refill :: Nil = Enum(UInt(), 3)
 | 
				
			||||||
  val state = Reg(init=s_ready)
 | 
					  val state = Reg(init=s_ready)
 | 
				
			||||||
  val invalidated = Reg(Bool())
 | 
					  val invalidated = Reg(Bool())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val refill_addr = Reg(UInt(width = paddrBits))
 | 
					  val refill_addr = Reg(UInt(width = paddrBits))
 | 
				
			||||||
 | 
					  val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
 | 
				
			||||||
 | 
					  val refill_idx = refill_addr(untagBits-1,blockOffBits)
 | 
				
			||||||
  val s1_tag_hit = Wire(Vec(nWays, Bool()))
 | 
					  val s1_tag_hit = Wire(Vec(nWays, Bool()))
 | 
				
			||||||
  val s1_any_tag_hit = s1_tag_hit.reduce(_||_)
 | 
					  val s1_any_tag_hit = s1_tag_hit.reduce(_||_) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s1_valid = Reg(init=Bool(false))
 | 
					  val s1_valid = Reg(init=Bool(false))
 | 
				
			||||||
  val out_valid = s1_valid && state === s_ready && !io.s1_kill
 | 
					  val s1_hit = s1_valid && s1_any_tag_hit
 | 
				
			||||||
  val s1_idx = io.s1_paddr(untagBits-1,blockOffBits)
 | 
					 | 
				
			||||||
  val s1_tag = io.s1_paddr(tagBits+untagBits-1,untagBits)
 | 
					 | 
				
			||||||
  val s1_hit = out_valid && s1_any_tag_hit
 | 
					 | 
				
			||||||
  val s1_miss = s1_valid && state === s_ready && !s1_any_tag_hit
 | 
					  val s1_miss = s1_valid && state === s_ready && !s1_any_tag_hit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s0_valid = io.req.valid && state === s_ready
 | 
					  io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
 | 
				
			||||||
 | 
					  val s0_valid = io.req.fire()
 | 
				
			||||||
  val s0_vaddr = io.req.bits.addr
 | 
					  val s0_vaddr = io.req.bits.addr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  s1_valid := s0_valid
 | 
					  s1_valid := s0_valid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  when (s1_miss) { refill_addr := io.s1_paddr }
 | 
					  when (s1_miss) { refill_addr := io.s1_paddr }
 | 
				
			||||||
  val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
 | 
					  val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
 | 
				
			||||||
  val refill_idx = refill_addr(untagBits-1,blockOffBits)
 | 
					  tl_out.d.ready := !s3_slaveValid
 | 
				
			||||||
  val (_, _, refill_done, refill_cnt) = edge.count(tl_out.d)
 | 
					  require (edge_out.manager.minLatency > 0)
 | 
				
			||||||
  tl_out.d.ready := Bool(true)
 | 
					
 | 
				
			||||||
  require (edge.manager.minLatency > 0)
 | 
					  val repl_way = if (isDM) UInt(0) else {
 | 
				
			||||||
 | 
					    // pick a way that is not used by the scratchpad
 | 
				
			||||||
 | 
					    val v0 = LFSR16(tl_out.a.fire())(log2Up(nWays)-1,0)
 | 
				
			||||||
 | 
					    var v = v0
 | 
				
			||||||
 | 
					    for (i <- log2Ceil(nWays) - 1 to 0 by -1) {
 | 
				
			||||||
 | 
					      val mask = nWays - (BigInt(1) << (i + 1))
 | 
				
			||||||
 | 
					      v = v | (lineInScratchpad(Cat(v0 | mask.U, refill_idx)) << i)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    assert(!lineInScratchpad(Cat(v, refill_idx)))
 | 
				
			||||||
 | 
					    v
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
 | 
					 | 
				
			||||||
  val entagbits = code.width(tagBits)
 | 
					  val entagbits = code.width(tagBits)
 | 
				
			||||||
  val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits)))
 | 
					  val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits)))
 | 
				
			||||||
  val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
 | 
					  val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
 | 
				
			||||||
@@ -99,8 +150,9 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val vb_array = Reg(init=Bits(0, nSets*nWays))
 | 
					  val vb_array = Reg(init=Bits(0, nSets*nWays))
 | 
				
			||||||
  when (refill_done && !invalidated) {
 | 
					  when (tl_out.d.fire()) {
 | 
				
			||||||
    vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), Bool(true))
 | 
					    // clear bit when refill starts so hit-under-miss doesn't fetch bad data
 | 
				
			||||||
 | 
					    vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  val invalidate = Wire(init = io.invalidate)
 | 
					  val invalidate = Wire(init = io.invalidate)
 | 
				
			||||||
  when (invalidate) {
 | 
					  when (invalidate) {
 | 
				
			||||||
@@ -108,75 +160,130 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			|||||||
    invalidated := Bool(true)
 | 
					    invalidated := Bool(true)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val s0_slaveAddr = tl_in.map(_.a.bits.address).getOrElse(0.U)
 | 
				
			||||||
 | 
					  val s1s3_slaveAddr = Reg(UInt())
 | 
				
			||||||
 | 
					  val s1s3_slaveData = Reg(UInt())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s1_tag_disparity = Wire(Vec(nWays, Bool()))
 | 
					  val s1_tag_disparity = Wire(Vec(nWays, Bool()))
 | 
				
			||||||
  val wordBits = coreInstBits * fetchWidth
 | 
					  val wordBits = coreInstBits * fetchWidth
 | 
				
			||||||
  val s1_dout = Wire(Vec(nWays, UInt(width = code.width(wordBits))))
 | 
					  val s1_dout = Wire(Vec(nWays, UInt(width = code.width(wordBits))))
 | 
				
			||||||
  val s1_dout_valid = RegNext(s0_valid)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (i <- 0 until nWays) {
 | 
					  for (i <- 0 until nWays) {
 | 
				
			||||||
    val s1_vb = vb_array(Cat(UInt(i), io.s1_paddr(untagBits-1,blockOffBits))).toBool
 | 
					    val s1_idx = io.s1_paddr(untagBits-1,blockOffBits)
 | 
				
			||||||
    s1_tag_disparity(i) := code.decode(tag_rdata(i)).error
 | 
					    val s1_tag = io.s1_paddr(tagBits+untagBits-1,untagBits)
 | 
				
			||||||
    s1_tag_hit(i) := s1_vb && code.decode(tag_rdata(i)).uncorrected === s1_tag
 | 
					    val scratchpadHit = Bool(i < nWays-1) &&
 | 
				
			||||||
 | 
					      Mux(s1_slaveValid,
 | 
				
			||||||
 | 
					        lineInScratchpad(scratchpadLine(s1s3_slaveAddr)) && scratchpadWay(s1s3_slaveAddr) === i,
 | 
				
			||||||
 | 
					        addrInScratchpad(io.s1_paddr) && scratchpadWay(io.s1_paddr) === i)
 | 
				
			||||||
 | 
					    val s1_vb = vb_array(Cat(UInt(i), s1_idx)) && !s1_slaveValid
 | 
				
			||||||
 | 
					    s1_tag_disparity(i) := s1_vb && code.decode(tag_rdata(i)).error
 | 
				
			||||||
 | 
					    s1_tag_hit(i) := scratchpadHit || (s1_vb && code.decode(tag_rdata(i)).uncorrected === s1_tag)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  assert(!(s1_valid || s1_slaveValid) || PopCount(s1_tag_hit zip s1_tag_disparity map { case (h, d) => h && !d }) <= 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  require(rowBits % wordBits == 0)
 | 
					  require(tl_out.d.bits.data.getWidth % wordBits == 0)
 | 
				
			||||||
  val data_arrays = Seq.fill(rowBits / wordBits) { SeqMem(nSets * refillCycles, Vec(nWays, UInt(width = code.width(wordBits)))) }
 | 
					  val data_arrays = Seq.fill(tl_out.d.bits.data.getWidth / wordBits) { SeqMem(nSets * refillCycles, Vec(nWays, UInt(width = code.width(wordBits)))) }
 | 
				
			||||||
  for ((data_array, i) <- data_arrays zipWithIndex) {
 | 
					  for ((data_array, i) <- data_arrays zipWithIndex) {
 | 
				
			||||||
    val wen = tl_out.d.valid
 | 
					    def wordMatch(addr: UInt) = addr.extract(log2Ceil(tl_out.d.bits.data.getWidth/8)-1, log2Ceil(wordBits/8)) === i
 | 
				
			||||||
 | 
					    def row(addr: UInt) = addr(untagBits-1, blockOffBits-log2Ceil(refillCycles))
 | 
				
			||||||
 | 
					    val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr))
 | 
				
			||||||
 | 
					    val wen = (tl_out.d.fire() && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
 | 
				
			||||||
 | 
					    val mem_idx = Mux(tl_out.d.fire(), (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
 | 
				
			||||||
 | 
					                  Mux(s3_slaveValid, row(s1s3_slaveAddr),
 | 
				
			||||||
 | 
					                  Mux(s0_slaveValid, row(s0_slaveAddr),
 | 
				
			||||||
 | 
					                  row(s0_vaddr))))
 | 
				
			||||||
    when (wen) {
 | 
					    when (wen) {
 | 
				
			||||||
      val idx = (refill_idx << log2Ceil(refillCycles)) | refill_cnt
 | 
					      val data = Mux(s3_slaveValid, s1s3_slaveData, tl_out.d.bits.data(wordBits*(i+1)-1, wordBits*i))
 | 
				
			||||||
      val data = tl_out.d.bits.data(wordBits*(i+1)-1, wordBits*i)
 | 
					      val way = Mux(s3_slaveValid, scratchpadWay(s1s3_slaveAddr), repl_way)
 | 
				
			||||||
      data_array.write(idx, Vec.fill(nWays)(code.encode(data)), (0 until nWays).map(repl_way === _))
 | 
					      data_array.write(mem_idx, Vec.fill(nWays)(code.encode(data)), (0 until nWays).map(way === _))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    def wordMatch(addr: UInt) = addr.extract(log2Ceil(rowBytes)-1, log2Ceil(wordBits/8)) === i
 | 
					    val dout = data_array.read(mem_idx, !wen && s0_ren)
 | 
				
			||||||
    val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
 | 
					    when (wordMatch(Mux(s1_slaveValid, s1s3_slaveAddr, io.s1_paddr))) {
 | 
				
			||||||
    val dout = data_array.read(s0_raddr, !wen && (s0_valid && wordMatch(s0_vaddr)))
 | 
					 | 
				
			||||||
    when (wordMatch(io.s1_paddr)) {
 | 
					 | 
				
			||||||
      s1_dout := dout
 | 
					      s1_dout := dout
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
  for ((data_array, i) <- data_arrays zipWithIndex) {
 | 
					 | 
				
			||||||
    val wen = tl_out.d.valid && repl_way === UInt(i)
 | 
					 | 
				
			||||||
    when (wen) {
 | 
					 | 
				
			||||||
      val e_d = code.encode(tl_out.d.bits.data)
 | 
					 | 
				
			||||||
      data_array.write((refill_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
 | 
					 | 
				
			||||||
    s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // output signals
 | 
					  // output signals
 | 
				
			||||||
  outer.latency match {
 | 
					  outer.latency match {
 | 
				
			||||||
    case 1 =>
 | 
					    case 1 =>
 | 
				
			||||||
      require(code.width(rowBits) == rowBits) // no ECC
 | 
					      require(code.isInstanceOf[uncore.util.IdentityCode])
 | 
				
			||||||
 | 
					      require(outer.icacheParams.itimAddr.isEmpty)
 | 
				
			||||||
      io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
 | 
					      io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
 | 
				
			||||||
      io.resp.valid := s1_hit
 | 
					      io.resp.valid := s1_hit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    case 2 =>
 | 
					    case 2 =>
 | 
				
			||||||
      val s2_valid = RegNext(out_valid, Bool(false))
 | 
					      val s2_valid = RegNext(s1_valid && !io.s1_kill, Bool(false))
 | 
				
			||||||
      val s2_hit = RegNext(s1_hit, Bool(false))
 | 
					      val s2_hit = RegNext(s1_hit, Bool(false))
 | 
				
			||||||
      val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid)
 | 
					      val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid || s1_slaveValid)
 | 
				
			||||||
      val s2_dout = RegEnable(s1_dout, s1_valid)
 | 
					      val s2_dout = RegEnable(s1_dout, s1_valid || s1_slaveValid)
 | 
				
			||||||
      val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
 | 
					      val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_valid).asUInt.orR
 | 
					      val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_valid || s1_slaveValid).asUInt.orR
 | 
				
			||||||
      val s2_data_disparity = code.decode(s2_way_mux).error
 | 
					      val s2_data_decoded = code.decode(s2_way_mux)
 | 
				
			||||||
      val s2_disparity = s2_tag_disparity || s2_data_disparity
 | 
					      val s2_disparity = s2_tag_disparity || s2_data_decoded.error
 | 
				
			||||||
      when (s2_valid && s2_disparity) { invalidate := true }
 | 
					      when (s2_valid && s2_disparity) { invalidate := true }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      io.resp.bits := code.decode(s2_way_mux).uncorrected
 | 
					      io.resp.bits := s2_data_decoded.uncorrected
 | 
				
			||||||
      io.resp.valid := s2_hit && !s2_disparity
 | 
					      io.resp.valid := s2_hit && !s2_disparity
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tl_in.map { tl =>
 | 
				
			||||||
 | 
					        tl.a.ready := !tl_out.d.fire() && !s1_slaveValid && !s2_slaveValid && !(tl.d.valid && !tl.d.ready)
 | 
				
			||||||
 | 
					        val s1_a = RegEnable(tl.a.bits, s0_slaveValid)
 | 
				
			||||||
 | 
					        when (s0_slaveValid) {
 | 
				
			||||||
 | 
					          val a = tl.a.bits
 | 
				
			||||||
 | 
					          s1s3_slaveAddr := tl.a.bits.address
 | 
				
			||||||
 | 
					          s1s3_slaveData := tl.a.bits.data
 | 
				
			||||||
 | 
					          when (edge_in.get.hasData(a)) {
 | 
				
			||||||
 | 
					            val enable = scratchpadWayValid(scratchpadWay(a.address))
 | 
				
			||||||
 | 
					            when (!lineInScratchpad(scratchpadLine(a.address))) {
 | 
				
			||||||
 | 
					              scratchpadMax := scratchpadLine(a.address)
 | 
				
			||||||
 | 
					              when (enable) { invalidate := true }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            scratchpadOn := enable
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        assert(!s2_valid || RegNext(RegNext(s0_vaddr)) === io.s2_vaddr)
 | 
				
			||||||
 | 
					        when (!(tl.a.valid || s1_slaveValid || s2_slaveValid)
 | 
				
			||||||
 | 
					              && s2_valid && s2_data_decoded.correctable && !s2_tag_disparity) {
 | 
				
			||||||
 | 
					          // handle correctable errors on CPU accesses to the scratchpad.
 | 
				
			||||||
 | 
					          // if there is an in-flight slave-port access to the scratchpad,
 | 
				
			||||||
 | 
					          // report the a miss but don't correct the error (as there is
 | 
				
			||||||
 | 
					          // a structural hazard on s1s3_slaveData/s1s3_slaveAddress).
 | 
				
			||||||
 | 
					          s3_slaveValid := true
 | 
				
			||||||
 | 
					          s1s3_slaveData := s2_data_decoded.corrected
 | 
				
			||||||
 | 
					          s1s3_slaveAddr := Cat(OHToUInt(s2_tag_hit), io.s2_vaddr(untagBits-1, log2Ceil(wordBits/8)), s1s3_slaveAddr(log2Ceil(wordBits/8)-1, 0))
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        val respValid = RegInit(false.B)
 | 
				
			||||||
 | 
					        respValid := s2_slaveValid || (respValid && !tl.d.ready)
 | 
				
			||||||
 | 
					        when (s2_slaveValid) {
 | 
				
			||||||
 | 
					          when (edge_in.get.hasData(s1_a)) { s3_slaveValid := true }
 | 
				
			||||||
 | 
					          def byteEn(i: Int) = !(edge_in.get.hasData(s1_a) && s1_a.mask(i))
 | 
				
			||||||
 | 
					          s1s3_slaveData := (0 until wordBits/8).map(i => Mux(byteEn(i), s2_data_decoded.corrected, s1s3_slaveData)(8*(i+1)-1, 8*i)).asUInt
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        tl.d.valid := respValid
 | 
				
			||||||
 | 
					        tl.d.bits := Mux(edge_in.get.hasData(s1_a),
 | 
				
			||||||
 | 
					          edge_in.get.AccessAck(s1_a, UInt(0)),
 | 
				
			||||||
 | 
					          edge_in.get.AccessAck(s1_a, UInt(0), UInt(0)))
 | 
				
			||||||
 | 
					        tl.d.bits.data := s1s3_slaveData
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Tie off unused channels
 | 
				
			||||||
 | 
					        tl.b.valid := false
 | 
				
			||||||
 | 
					        tl.c.ready := true
 | 
				
			||||||
 | 
					        tl.e.ready := true
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  tl_out.a.valid := state === s_request && !io.s2_kill
 | 
					  tl_out.a.valid := state === s_request && !io.s2_kill
 | 
				
			||||||
  tl_out.a.bits := edge.Get(
 | 
					  tl_out.a.bits := edge_out.Get(
 | 
				
			||||||
                    fromSource = UInt(0),
 | 
					                    fromSource = UInt(0),
 | 
				
			||||||
                    toAddress = (refill_addr >> blockOffBits) << blockOffBits,
 | 
					                    toAddress = (refill_addr >> blockOffBits) << blockOffBits,
 | 
				
			||||||
                    lgSize = lgCacheBlockBytes)._2
 | 
					                    lgSize = lgCacheBlockBytes)._2
 | 
				
			||||||
  tl_out.b.ready := Bool(true)
 | 
					  tl_out.b.ready := Bool(true)
 | 
				
			||||||
  tl_out.c.valid := Bool(false)
 | 
					  tl_out.c.valid := Bool(false)
 | 
				
			||||||
  tl_out.e.valid := Bool(false)
 | 
					  tl_out.e.valid := Bool(false)
 | 
				
			||||||
 | 
					  assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // control state machine
 | 
					  // control state machine
 | 
				
			||||||
  switch (state) {
 | 
					  switch (state) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,8 +8,10 @@ import chisel3.core.withReset
 | 
				
			|||||||
import config._
 | 
					import config._
 | 
				
			||||||
import tile._
 | 
					import tile._
 | 
				
			||||||
import uncore.constants._
 | 
					import uncore.constants._
 | 
				
			||||||
 | 
					import diplomacy._
 | 
				
			||||||
import util._
 | 
					import util._
 | 
				
			||||||
import Chisel.ImplicitConversions._
 | 
					import Chisel.ImplicitConversions._
 | 
				
			||||||
 | 
					import collection.immutable.ListMap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
case class RocketCoreParams(
 | 
					case class RocketCoreParams(
 | 
				
			||||||
  bootFreqHz: BigInt = 0,
 | 
					  bootFreqHz: BigInt = 0,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -105,7 +105,7 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/** Mix-ins for constructing tiles that have optional scratchpads */
 | 
					/** Mix-ins for constructing tiles that have optional scratchpads */
 | 
				
			||||||
trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend {
 | 
					trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend with HasCoreParameters {
 | 
				
			||||||
  val module: CanHaveScratchpadModule
 | 
					  val module: CanHaveScratchpadModule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val scratch = tileParams.dcache.flatMap(d => d.scratch.map(s =>
 | 
					  val scratch = tileParams.dcache.flatMap(d => d.scratch.map(s =>
 | 
				
			||||||
@@ -113,6 +113,10 @@ trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend {
 | 
				
			|||||||
  val slaveNode = TLInputNode()
 | 
					  val slaveNode = TLInputNode()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  scratch foreach { lm => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(slaveNode) }
 | 
					  scratch foreach { lm => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(slaveNode) }
 | 
				
			||||||
 | 
					  frontend.slaveNode foreach { _ :=
 | 
				
			||||||
 | 
					    TLFragmenter(fetchWidth*coreInstBytes, p(CacheBlockBytes), true)(
 | 
				
			||||||
 | 
					      TLWidthWidget(p(XLen)/8)(slaveNode))
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def findScratchpadFromICache: Option[AddressSet] = scratch.map { s =>
 | 
					  def findScratchpadFromICache: Option[AddressSet] = scratch.map { s =>
 | 
				
			||||||
    val finalNode = frontend.node.edgesOut(0).manager.managers.find(_.nodePath.last == s.node)
 | 
					    val finalNode = frontend.node.edgesOut(0).manager.managers.find(_.nodePath.last == s.node)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -132,6 +132,7 @@ class RocketTileModule(outer: RocketTile) extends BaseTileModule(outer, () => ne
 | 
				
			|||||||
  core.io.hartid := io.hartid // Pass through the hartid
 | 
					  core.io.hartid := io.hartid // Pass through the hartid
 | 
				
			||||||
  outer.frontend.module.io.cpu <> core.io.imem
 | 
					  outer.frontend.module.io.cpu <> core.io.imem
 | 
				
			||||||
  outer.frontend.module.io.resetVector := io.resetVector
 | 
					  outer.frontend.module.io.resetVector := io.resetVector
 | 
				
			||||||
 | 
					  outer.frontend.module.io.hartid := io.hartid
 | 
				
			||||||
  dcachePorts += core.io.dmem // TODO outer.dcachePorts += () => module.core.io.dmem ??
 | 
					  dcachePorts += core.io.dmem // TODO outer.dcachePorts += () => module.core.io.dmem ??
 | 
				
			||||||
  fpuOpt foreach { fpu => core.io.fpu <> fpu.io }
 | 
					  fpuOpt foreach { fpu => core.io.fpu <> fpu.io }
 | 
				
			||||||
  core.io.ptw <> ptw.io.dpath
 | 
					  core.io.ptw <> ptw.io.dpath
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user