simplify page table walker; speed up emulator
This commit is contained in:
		| @@ -20,14 +20,13 @@ class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component | ||||
|     val w = Vec(nwrite) { new write_port() } | ||||
|   } | ||||
|  | ||||
|   val busybits = Vec(entries) { Reg(resetVal = Bool(false)) } | ||||
|   val busybits = Reg(resetVal = Bits(0, entries)) | ||||
|  | ||||
|   val wmasks = (0 until nwrite).map(i => Fill(entries, io.w(i).en) & (UFix(1) << io.w(i).addr)) | ||||
|   val wdatas = (0 until nwrite).map(i => Mux(io.w(i).data, wmasks(i), UFix(0))) | ||||
|   var next = busybits & ~wmasks.reduceLeft(_|_) | wdatas.reduceLeft(_|_) | ||||
|   busybits := next | ||||
|  | ||||
|   for (i <- 0 until nread) | ||||
|     io.r(i).data := busybits(io.r(i).addr) | ||||
|  | ||||
|   for (i <- 0 until nwrite) { | ||||
|     when (io.w(i).en) { | ||||
|       busybits(io.w(i).addr) := io.w(i).data | ||||
|     } | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -34,9 +34,9 @@ class rocketDpathALU extends Component | ||||
|   val shright = sra || (io.fn === FN_SR) | ||||
|   val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) | ||||
|   val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) | ||||
|   val shin_r = Cat(shin_hi, io.in1(31,0)) | ||||
|   val shin = Mux(shright, shin_r, Reverse(shin_r)) | ||||
|   val shout_r = (Cat(sra & shin_r(63), shin).toFix >> shamt)(63,0) | ||||
|   val shin = Cat(shin_hi, io.in1(31,0)) | ||||
|   val shout_r = (Cat(sra & shin(63), shin).toFix >> shamt)(63,0) | ||||
|   val shout_l = (shin << shamt)(63,0) | ||||
|  | ||||
|   val bitwise_logic = | ||||
|     Mux(io.fn === FN_AND, io.in1 & io.in2, | ||||
| @@ -48,7 +48,7 @@ class rocketDpathALU extends Component | ||||
|     Mux(io.fn === FN_ADD || io.fn === FN_SUB,  sum, | ||||
|     Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, | ||||
|     Mux(io.fn === FN_SR  || io.fn === FN_SRA,  shout_r, | ||||
|     Mux(io.fn === FN_SL,                       Reverse(shout_r), | ||||
|     Mux(io.fn === FN_SL,                       shout_l, | ||||
|         bitwise_logic)))) | ||||
|  | ||||
|   val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) | ||||
|   | ||||
| @@ -92,7 +92,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten | ||||
|     Mux((state === s_refill_wait) || (state === s_refill),  Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), | ||||
|       io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; | ||||
|  | ||||
|   val data_mux = (new Mux1H(assoc)){Bits(width = MEM_DATA_BITS)} | ||||
|   val data_mux = (new Mux1H(assoc)){Bits(width = databits)} | ||||
|   var any_hit = Bool(false) | ||||
|   for (i <- 0 until assoc) | ||||
|   { | ||||
|   | ||||
| @@ -81,17 +81,23 @@ class rocketPTW extends Component | ||||
| { | ||||
|   val io = new ioPTW(); | ||||
|    | ||||
|   val s_ready :: s_l1_req :: s_l1_wait :: s_l1_fake :: s_l2_req :: s_l2_wait :: s_l2_fake:: s_l3_req :: s_l3_wait :: s_done :: s_error :: Nil = Enum(11) { UFix() }; | ||||
|   val levels = 3 | ||||
|   val bitsPerLevel = VPN_BITS/levels | ||||
|   require(VPN_BITS == levels * bitsPerLevel) | ||||
|  | ||||
|   val count = Reg() { UFix(width = log2up(levels)) } | ||||
|   val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UFix() }; | ||||
|   val state = Reg(resetVal = s_ready); | ||||
|    | ||||
|   val r_req_vpn = Reg() { Bits() } | ||||
|   val r_req_dest = Reg() { Bits() } | ||||
|    | ||||
|   val req_addr = Reg() { UFix() }; | ||||
|   val req_addr = Reg() { Bits() } | ||||
|   val r_resp_ppn = Reg() { Bits() }; | ||||
|   val r_resp_perm = Reg() { Bits() }; | ||||
|    | ||||
|   val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10));  | ||||
|   val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) | ||||
|   val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) | ||||
|   val req_val = io.itlb.req_val || io.dtlb.req_val || io.vitlb.req_val | ||||
|    | ||||
|   // give ITLB requests priority over DTLB requests | ||||
| @@ -102,44 +108,40 @@ class rocketPTW extends Component | ||||
|   when ((state === s_ready) && req_itlb_val) { | ||||
|     r_req_vpn  := io.itlb.req_vpn; | ||||
|     r_req_dest := Bits(0) | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) | ||||
|   } | ||||
|  | ||||
|   when ((state === s_ready) && req_dtlb_val) { | ||||
|     r_req_vpn  := io.dtlb.req_vpn; | ||||
|     r_req_dest := Bits(1) | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) | ||||
|   } | ||||
|    | ||||
|   when ((state === s_ready) && req_vitlb_val) { | ||||
|     r_req_vpn  := io.vitlb.req_vpn; | ||||
|     r_req_dest := Bits(2) | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) | ||||
|   } | ||||
|  | ||||
|   val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) | ||||
|   when (dmem_resp_val) { | ||||
|     req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; | ||||
|     req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) | ||||
|     r_resp_perm := io.dmem.resp_data_subword(9,4); | ||||
|     r_resp_ppn  := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); | ||||
|   } | ||||
|    | ||||
|   io.dmem.req_val := | ||||
|     (state === s_l1_req) || | ||||
|     (state === s_l2_req) || | ||||
|     (state === s_l3_req); | ||||
|      | ||||
|   io.dmem.req_val := state === s_req | ||||
|   io.dmem.req_cmd  := M_XRD; | ||||
|   io.dmem.req_type := MT_D; | ||||
|   io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); | ||||
|   io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) | ||||
|   io.dmem.req_kill := Bool(false) | ||||
|    | ||||
|   val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); | ||||
|   val resp_err = (state === s_error); | ||||
|   val resp_val = state === s_done | ||||
|   val resp_err = state === s_error | ||||
|    | ||||
|   val resp_ptd = (io.dmem.resp_data_subword(1,0) === Bits(1,2)); | ||||
|   val resp_pte = (io.dmem.resp_data_subword(1,0) === Bits(2,2)); | ||||
|   val resp_ptd = io.dmem.resp_data_subword(1,0) === Bits(1) | ||||
|   val resp_pte = io.dmem.resp_data_subword(1,0) === Bits(2) | ||||
|    | ||||
|   io.itlb.req_rdy   := (state === s_ready) | ||||
|   io.dtlb.req_rdy   := (state === s_ready) && !io.itlb.req_val | ||||
| @@ -153,11 +155,9 @@ class rocketPTW extends Component | ||||
|   io.itlb.resp_perm := r_resp_perm | ||||
|   io.dtlb.resp_perm := r_resp_perm | ||||
|   io.vitlb.resp_perm:= r_resp_perm | ||||
|    | ||||
|   val resp_ppn = | ||||
|     Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7),  r_req_vpn(VPN_BITS-11, 0)), | ||||
|     Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)), | ||||
|       r_resp_ppn)); | ||||
|   | ||||
|   val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) | ||||
|   val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) | ||||
|        | ||||
|   io.itlb.resp_ppn  := resp_ppn; | ||||
|   io.dtlb.resp_ppn  := resp_ppn; | ||||
| @@ -167,78 +167,34 @@ class rocketPTW extends Component | ||||
|   switch (state) { | ||||
|     is (s_ready) { | ||||
|       when (req_val) { | ||||
|         state := s_l1_req; | ||||
|         state := s_req; | ||||
|       } | ||||
|       count := UFix(0) | ||||
|     } | ||||
|     // level 1 | ||||
|     is (s_l1_req) { | ||||
|     is (s_req) { | ||||
|       when (io.dmem.req_rdy) { | ||||
|         state := s_l1_wait; | ||||
|         state := s_wait; | ||||
|       } | ||||
|     } | ||||
|     is (s_l1_wait) { | ||||
|     is (s_wait) { | ||||
|       when (io.dmem.resp_nack) { | ||||
|         state := s_l1_req | ||||
|       } | ||||
|       when (dmem_resp_val) { | ||||
|         when (resp_ptd) { // page table descriptor | ||||
|           state := s_l2_req; | ||||
|         } | ||||
|         .elsewhen (resp_pte) { // page table entry | ||||
|           state := s_l1_fake; | ||||
|         } | ||||
|         .otherwise { | ||||
|           state := s_error; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     is (s_l1_fake) { | ||||
|       state := s_ready; | ||||
|     } | ||||
|     // level 2 | ||||
|     is (s_l2_req) { | ||||
|       when (io.dmem.req_rdy) { | ||||
|         state := s_l2_wait; | ||||
|       } | ||||
|     } | ||||
|     is (s_l2_wait) { | ||||
|       when (io.dmem.resp_nack) { | ||||
|         state := s_l2_req | ||||
|       } | ||||
|       when (dmem_resp_val) { | ||||
|         when (resp_ptd) { // page table descriptor | ||||
|           state := s_l3_req; | ||||
|         } | ||||
|         .elsewhen (resp_pte) { // page table entry | ||||
|           state := s_l2_fake; | ||||
|         } | ||||
|         .otherwise { | ||||
|           state := s_error; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     is (s_l2_fake) { | ||||
|       state := s_ready; | ||||
|     } | ||||
|     // level 3 | ||||
|     is (s_l3_req) { | ||||
|       when (io.dmem.req_rdy) { | ||||
|         state := s_l3_wait; | ||||
|       } | ||||
|     } | ||||
|     is (s_l3_wait) { | ||||
|       when (io.dmem.resp_nack) { | ||||
|         state := s_l3_req | ||||
|         state := s_req | ||||
|       } | ||||
|       when (dmem_resp_val) { | ||||
|         when (resp_pte) { // page table entry | ||||
|           state := s_done; | ||||
|           state := s_done | ||||
|         } | ||||
|         .otherwise { | ||||
|           state := s_error; | ||||
|           count := count + UFix(1) | ||||
|           when (resp_ptd && count < UFix(levels-1)) { | ||||
|             state := s_req | ||||
|           } | ||||
|           .otherwise { | ||||
|             state := s_error | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     }   | ||||
|     } | ||||
|     is (s_done) { | ||||
|       state := s_ready; | ||||
|     } | ||||
|   | ||||
| @@ -69,27 +69,27 @@ object PopCount | ||||
|  | ||||
| object Reverse | ||||
| { | ||||
|   def apply(in: Bits) = | ||||
|   def doit(in: Bits, base: Int, length: Int): Bits = | ||||
|   { | ||||
|     var out = in(in.getWidth-1) | ||||
|     for (i <- 1 until in.getWidth) | ||||
|       out = Cat(in(in.getWidth-i-1), out) | ||||
|     out | ||||
|     val half = (1 << log2up(length))/2 | ||||
|     if (length == 1) | ||||
|       in(base) | ||||
|     else | ||||
|       Cat(doit(in, base, half), doit(in, base+half, length-half)) | ||||
|   } | ||||
|   def apply(in: Bits) = doit(in, 0, in.getWidth) | ||||
| } | ||||
|  | ||||
| object OHToUFix | ||||
| { | ||||
|   def apply(in: Bits): UFix =  | ||||
|   { | ||||
|     val out = MuxCase( UFix(0), (0 until in.getWidth).map( i => (in(i).toBool, UFix(i)))) | ||||
|     out.toUFix | ||||
|   } | ||||
|   def apply(in: Seq[Bool]): UFix =  | ||||
|   { | ||||
|     val out = MuxCase( UFix(0), in.zipWithIndex map {case (b,i) => (b, UFix(i))}) | ||||
|     out.toUFix | ||||
|   def apply(in: Seq[Bits]): UFix = { | ||||
|     if (in.size <= 1) return UFix(0) | ||||
|     if (in.size == 2) return in(1) | ||||
|     val hi = in.slice(in.size/2, in.size) | ||||
|     val lo = in.slice(0, in.size/2) | ||||
|     Cat(hi.reduceLeft(_||_), apply(hi zip lo map { case (x, y) => x || y })) | ||||
|   } | ||||
|   def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_))) | ||||
| } | ||||
|  | ||||
| object UFixToOH | ||||
| @@ -119,7 +119,7 @@ object ShiftRegister | ||||
|  | ||||
| object Mux1H  | ||||
| { | ||||
|   def buildMux[T <: Data](sel: Bits, in: Vec[T], i: Int, n: Int): T = { | ||||
|   def buildMux[T <: Data](sel: Bits, in: Seq[T], i: Int, n: Int): T = { | ||||
|     if (n == 1) | ||||
|       in(i) | ||||
|     else | ||||
| @@ -131,8 +131,8 @@ object Mux1H | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   def apply [T <: Data](sel: Bits, in: Vec[T]): T = buildMux(sel, in, 0, sel.getWidth) | ||||
|   def apply [T <: Data](sel: Vec[Bool], in: Vec[T]): T = apply(sel.toBits, in) | ||||
|   def apply [T <: Data](sel: Bits, in: Seq[T]): T = buildMux(sel, in, 0, sel.getWidth) | ||||
|   def apply [T <: Data](sel: Seq[Bool], in: Seq[T]): T = buildMux(Cat(Bits(0),sel.reverse:_*), in, 0, sel.size) | ||||
| } | ||||
|  | ||||
| class Mux1H [T <: Data](n: Int)(gen: => T) extends Component | ||||
|   | ||||
		Reference in New Issue
	
	Block a user