Fetch smaller parcels from the I$
This commit is contained in:
		
				
					committed by
					
						
						Andrew Waterman
					
				
			
			
				
	
			
			
			
						parent
						
							c72b15f2a0
						
					
				
				
					commit
					061a0adceb
				
			@@ -68,7 +68,6 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
 | 
					  val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
 | 
				
			||||||
  val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
 | 
					  val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
 | 
				
			||||||
  val s1_speculative = Reg(Bool())
 | 
					  val s1_speculative = Reg(Bool())
 | 
				
			||||||
  val s1_same_block = Reg(Bool())
 | 
					 | 
				
			||||||
  val s2_valid = Reg(init=Bool(true))
 | 
					  val s2_valid = Reg(init=Bool(true))
 | 
				
			||||||
  val s2_pc = Reg(init=io.resetVector)
 | 
					  val s2_pc = Reg(init=io.resetVector)
 | 
				
			||||||
  val s2_btb_resp_valid = Reg(init=Bool(false))
 | 
					  val s2_btb_resp_valid = Reg(init=Bool(false))
 | 
				
			||||||
@@ -83,16 +82,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  val s2_cacheable = Reg(init=Bool(false))
 | 
					  val s2_cacheable = Reg(init=Bool(false))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
 | 
					  val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
 | 
				
			||||||
  val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes)
 | 
					 | 
				
			||||||
  val predicted_npc = Wire(init = ntpc)
 | 
					  val predicted_npc = Wire(init = ntpc)
 | 
				
			||||||
  val predicted_taken = Wire(init = Bool(false))
 | 
					  val predicted_taken = Wire(init = Bool(false))
 | 
				
			||||||
  val icmiss = s2_valid && !icache.io.resp.valid
 | 
					  val icmiss = s2_valid && !icache.io.resp.valid
 | 
				
			||||||
  val npc = Mux(icmiss, s2_pc, predicted_npc)
 | 
					  val npc = Mux(icmiss, s2_pc, predicted_npc)
 | 
				
			||||||
  val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val stall = io.cpu.resp.valid && !io.cpu.resp.ready
 | 
					  val stall = io.cpu.resp.valid && !io.cpu.resp.ready
 | 
				
			||||||
  when (!stall) {
 | 
					  when (!stall) {
 | 
				
			||||||
    s1_same_block := s0_same_block && !tlb.io.resp.miss
 | 
					 | 
				
			||||||
    s1_pc_ := io.cpu.npc
 | 
					    s1_pc_ := io.cpu.npc
 | 
				
			||||||
    // consider RVC fetches across blocks to be non-speculative if the first
 | 
					    // consider RVC fetches across blocks to be non-speculative if the first
 | 
				
			||||||
    // part was non-speculative
 | 
					    // part was non-speculative
 | 
				
			||||||
@@ -111,7 +107,6 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  when (io.cpu.req.valid) {
 | 
					  when (io.cpu.req.valid) {
 | 
				
			||||||
    s1_same_block := Bool(false)
 | 
					 | 
				
			||||||
    s1_pc_ := io.cpu.npc
 | 
					    s1_pc_ := io.cpu.npc
 | 
				
			||||||
    s1_speculative := io.cpu.req.bits.speculative
 | 
					    s1_speculative := io.cpu.req.bits.speculative
 | 
				
			||||||
    s2_valid := Bool(false)
 | 
					    s2_valid := Bool(false)
 | 
				
			||||||
@@ -144,21 +139,20 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
 | 
				
			|||||||
  tlb.io.req.bits.sfence := io.cpu.sfence
 | 
					  tlb.io.req.bits.sfence := io.cpu.sfence
 | 
				
			||||||
  tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
 | 
					  tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  icache.io.req.valid := !stall && !s0_same_block
 | 
					  icache.io.req.valid := !stall
 | 
				
			||||||
  icache.io.req.bits.addr := io.cpu.npc
 | 
					  icache.io.req.bits.addr := io.cpu.npc
 | 
				
			||||||
  icache.io.invalidate := io.cpu.flush_icache
 | 
					  icache.io.invalidate := io.cpu.flush_icache
 | 
				
			||||||
  icache.io.s1_paddr := tlb.io.resp.paddr
 | 
					  icache.io.s1_paddr := tlb.io.resp.paddr
 | 
				
			||||||
  icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || s1_speculative && !tlb.io.resp.cacheable || tlb.io.resp.pf.inst || tlb.io.resp.ae.inst
 | 
					  icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || s1_speculative && !tlb.io.resp.cacheable || tlb.io.resp.pf.inst || tlb.io.resp.ae.inst
 | 
				
			||||||
  icache.io.s2_kill := false
 | 
					  icache.io.s2_kill := false
 | 
				
			||||||
  icache.io.resp.ready := !stall && !s1_same_block
 | 
					  icache.io.resp.ready := !stall
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s2_kill = s2_speculative && !s2_cacheable || s2_xcpt
 | 
					  val s2_kill = s2_speculative && !s2_cacheable || s2_xcpt
 | 
				
			||||||
  io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_kill)
 | 
					  io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_kill)
 | 
				
			||||||
  io.cpu.resp.bits.pc := s2_pc
 | 
					  io.cpu.resp.bits.pc := s2_pc
 | 
				
			||||||
  io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
 | 
					  io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth))
 | 
					  io.cpu.resp.bits.data := icache.io.resp.bits
 | 
				
			||||||
  io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Ceil(rowBytes)-1,log2Ceil(fetchWidth*coreInstBytes)) << log2Ceil(fetchWidth*coreInstBits))
 | 
					 | 
				
			||||||
  io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
 | 
					  io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
 | 
				
			||||||
  io.cpu.resp.bits.pf := s2_pf
 | 
					  io.cpu.resp.bits.pf := s2_pf
 | 
				
			||||||
  io.cpu.resp.bits.ae := s2_ae
 | 
					  io.cpu.resp.bits.ae := s2_ae
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,7 +47,7 @@ class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
 | 
				
			|||||||
  val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
 | 
					  val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
 | 
				
			||||||
  val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
 | 
					  val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val resp = Decoupled(new ICacheResp)
 | 
					  val resp = Decoupled(UInt(width = coreInstBits * fetchWidth))
 | 
				
			||||||
  val invalidate = Bool(INPUT)
 | 
					  val invalidate = Bool(INPUT)
 | 
				
			||||||
  val mem = outer.node.bundleOut
 | 
					  val mem = outer.node.bundleOut
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -110,7 +110,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s1_tag_disparity = Wire(Vec(nWays, Bool()))
 | 
					  val s1_tag_disparity = Wire(Vec(nWays, Bool()))
 | 
				
			||||||
  val s1_dout = Wire(Vec(nWays, UInt(width = code.width(rowBits))))
 | 
					  val wordBits = coreInstBits * fetchWidth
 | 
				
			||||||
 | 
					  val s1_dout = Wire(Vec(nWays, UInt(width = code.width(wordBits))))
 | 
				
			||||||
  val s1_dout_valid = RegNext(s0_valid)
 | 
					  val s1_dout_valid = RegNext(s0_valid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (i <- 0 until nWays) {
 | 
					  for (i <- 0 until nWays) {
 | 
				
			||||||
@@ -119,7 +120,24 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			|||||||
    s1_tag_hit(i) := s1_vb && ((code.decode(tag_rdata(i)).uncorrected === s1_tag) holdUnless s1_dout_valid)
 | 
					    s1_tag_hit(i) := s1_vb && ((code.decode(tag_rdata(i)).uncorrected === s1_tag) holdUnless s1_dout_valid)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val data_arrays = Seq.fill(nWays) { SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits))) }
 | 
					  require(rowBits % wordBits == 0)
 | 
				
			||||||
 | 
					  val data_arrays = Seq.fill(rowBits / wordBits) { SeqMem(nSets * refillCycles, Vec(nWays, UInt(width = code.width(wordBits)))) }
 | 
				
			||||||
 | 
					  for ((data_array, i) <- data_arrays zipWithIndex) {
 | 
				
			||||||
 | 
					    val wen = tl_out.d.valid
 | 
				
			||||||
 | 
					    when (wen) {
 | 
				
			||||||
 | 
					      val idx = (refill_idx << log2Ceil(refillCycles)) | refill_cnt
 | 
				
			||||||
 | 
					      val data = tl_out.d.bits.data(wordBits*(i+1)-1, wordBits*i)
 | 
				
			||||||
 | 
					      data_array.write(idx, Vec.fill(nWays)(code.encode(data)), (0 until nWays).map(repl_way === _))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    def wordMatch(addr: UInt) = addr.extract(log2Ceil(rowBytes)-1, log2Ceil(wordBits/8)) === i
 | 
				
			||||||
 | 
					    val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
 | 
				
			||||||
 | 
					    val dout = data_array.read(s0_raddr, !wen && (s0_valid && wordMatch(s0_vaddr))) holdUnless s1_dout_valid
 | 
				
			||||||
 | 
					    when (wordMatch(io.s1_paddr)) {
 | 
				
			||||||
 | 
					      s1_dout := dout
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
  for ((data_array, i) <- data_arrays zipWithIndex) {
 | 
					  for ((data_array, i) <- data_arrays zipWithIndex) {
 | 
				
			||||||
    val wen = tl_out.d.valid && repl_way === UInt(i)
 | 
					    val wen = tl_out.d.valid && repl_way === UInt(i)
 | 
				
			||||||
    when (wen) {
 | 
					    when (wen) {
 | 
				
			||||||
@@ -129,12 +147,13 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			|||||||
    val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
 | 
					    val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
 | 
				
			||||||
    s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) holdUnless s1_dout_valid
 | 
					    s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) holdUnless s1_dout_valid
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // output signals
 | 
					  // output signals
 | 
				
			||||||
  outer.latency match {
 | 
					  outer.latency match {
 | 
				
			||||||
    case 1 =>
 | 
					    case 1 =>
 | 
				
			||||||
      require(code.width(rowBits) == rowBits) // no ECC
 | 
					      require(code.width(rowBits) == rowBits) // no ECC
 | 
				
			||||||
      io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
 | 
					      io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
 | 
				
			||||||
      io.resp.valid := s1_hit
 | 
					      io.resp.valid := s1_hit
 | 
				
			||||||
    case 2 =>
 | 
					    case 2 =>
 | 
				
			||||||
      val s2_valid = RegEnable(out_valid, Bool(false), !stall)
 | 
					      val s2_valid = RegEnable(out_valid, Bool(false), !stall)
 | 
				
			||||||
@@ -148,7 +167,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
 | 
				
			|||||||
      val s2_disparity = s2_tag_disparity || s2_data_disparity
 | 
					      val s2_disparity = s2_tag_disparity || s2_data_disparity
 | 
				
			||||||
      when (s2_valid && s2_disparity) { invalidate := true }
 | 
					      when (s2_valid && s2_disparity) { invalidate := true }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      io.resp.bits.datablock := code.decode(s2_way_mux).uncorrected
 | 
					      io.resp.bits := code.decode(s2_way_mux).uncorrected
 | 
				
			||||||
      io.resp.valid := s2_hit && !s2_disparity
 | 
					      io.resp.valid := s2_hit && !s2_disparity
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  tl_out.a.valid := state === s_request && !io.s2_kill
 | 
					  tl_out.a.valid := state === s_request && !io.s2_kill
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user