1
0

simplify page table walker; speed up emulator

This commit is contained in:
Andrew Waterman 2012-05-01 01:24:36 -07:00
parent c13d3e6f88
commit eafdffe125
5 changed files with 64 additions and 109 deletions

View File

@ -20,14 +20,13 @@ class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component
val w = Vec(nwrite) { new write_port() } val w = Vec(nwrite) { new write_port() }
} }
val busybits = Vec(entries) { Reg(resetVal = Bool(false)) } val busybits = Reg(resetVal = Bits(0, entries))
val wmasks = (0 until nwrite).map(i => Fill(entries, io.w(i).en) & (UFix(1) << io.w(i).addr))
val wdatas = (0 until nwrite).map(i => Mux(io.w(i).data, wmasks(i), UFix(0)))
var next = busybits & ~wmasks.reduceLeft(_|_) | wdatas.reduceLeft(_|_)
busybits := next
for (i <- 0 until nread) for (i <- 0 until nread)
io.r(i).data := busybits(io.r(i).addr) io.r(i).data := busybits(io.r(i).addr)
for (i <- 0 until nwrite) {
when (io.w(i).en) {
busybits(io.w(i).addr) := io.w(i).data
}
}
} }

View File

@ -34,9 +34,9 @@ class rocketDpathALU extends Component
val shright = sra || (io.fn === FN_SR) val shright = sra || (io.fn === FN_SR)
val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32))
val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32)
val shin_r = Cat(shin_hi, io.in1(31,0)) val shin = Cat(shin_hi, io.in1(31,0))
val shin = Mux(shright, shin_r, Reverse(shin_r)) val shout_r = (Cat(sra & shin(63), shin).toFix >> shamt)(63,0)
val shout_r = (Cat(sra & shin_r(63), shin).toFix >> shamt)(63,0) val shout_l = (shin << shamt)(63,0)
val bitwise_logic = val bitwise_logic =
Mux(io.fn === FN_AND, io.in1 & io.in2, Mux(io.fn === FN_AND, io.in1 & io.in2,
@ -48,7 +48,7 @@ class rocketDpathALU extends Component
Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum,
Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less,
Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r,
Mux(io.fn === FN_SL, Reverse(shout_r), Mux(io.fn === FN_SL, shout_l,
bitwise_logic)))) bitwise_logic))))
val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31)))

View File

@ -92,7 +92,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten
Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count),
io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix;
val data_mux = (new Mux1H(assoc)){Bits(width = MEM_DATA_BITS)} val data_mux = (new Mux1H(assoc)){Bits(width = databits)}
var any_hit = Bool(false) var any_hit = Bool(false)
for (i <- 0 until assoc) for (i <- 0 until assoc)
{ {

View File

@ -81,17 +81,23 @@ class rocketPTW extends Component
{ {
val io = new ioPTW(); val io = new ioPTW();
val s_ready :: s_l1_req :: s_l1_wait :: s_l1_fake :: s_l2_req :: s_l2_wait :: s_l2_fake:: s_l3_req :: s_l3_wait :: s_done :: s_error :: Nil = Enum(11) { UFix() }; val levels = 3
val bitsPerLevel = VPN_BITS/levels
require(VPN_BITS == levels * bitsPerLevel)
val count = Reg() { UFix(width = log2up(levels)) }
val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UFix() };
val state = Reg(resetVal = s_ready); val state = Reg(resetVal = s_ready);
val r_req_vpn = Reg() { Bits() } val r_req_vpn = Reg() { Bits() }
val r_req_dest = Reg() { Bits() } val r_req_dest = Reg() { Bits() }
val req_addr = Reg() { UFix() }; val req_addr = Reg() { Bits() }
val r_resp_ppn = Reg() { Bits() }; val r_resp_ppn = Reg() { Bits() };
val r_resp_perm = Reg() { Bits() }; val r_resp_perm = Reg() { Bits() };
val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10)); val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel))
val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j))
val req_val = io.itlb.req_val || io.dtlb.req_val || io.vitlb.req_val val req_val = io.itlb.req_val || io.dtlb.req_val || io.vitlb.req_val
// give ITLB requests priority over DTLB requests // give ITLB requests priority over DTLB requests
@ -102,44 +108,40 @@ class rocketPTW extends Component
when ((state === s_ready) && req_itlb_val) { when ((state === s_ready) && req_itlb_val) {
r_req_vpn := io.itlb.req_vpn; r_req_vpn := io.itlb.req_vpn;
r_req_dest := Bits(0) r_req_dest := Bits(0)
req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3))
} }
when ((state === s_ready) && req_dtlb_val) { when ((state === s_ready) && req_dtlb_val) {
r_req_vpn := io.dtlb.req_vpn; r_req_vpn := io.dtlb.req_vpn;
r_req_dest := Bits(1) r_req_dest := Bits(1)
req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3))
} }
when ((state === s_ready) && req_vitlb_val) { when ((state === s_ready) && req_vitlb_val) {
r_req_vpn := io.vitlb.req_vpn; r_req_vpn := io.vitlb.req_vpn;
r_req_dest := Bits(2) r_req_dest := Bits(2)
req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3))
} }
val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false))
when (dmem_resp_val) { when (dmem_resp_val) {
req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3))
r_resp_perm := io.dmem.resp_data_subword(9,4); r_resp_perm := io.dmem.resp_data_subword(9,4);
r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS);
} }
io.dmem.req_val := io.dmem.req_val := state === s_req
(state === s_l1_req) ||
(state === s_l2_req) ||
(state === s_l3_req);
io.dmem.req_cmd := M_XRD; io.dmem.req_cmd := M_XRD;
io.dmem.req_type := MT_D; io.dmem.req_type := MT_D;
io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); io.dmem.req_idx := req_addr(PGIDX_BITS-1,0);
io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS))
io.dmem.req_kill := Bool(false) io.dmem.req_kill := Bool(false)
val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); val resp_val = state === s_done
val resp_err = (state === s_error); val resp_err = state === s_error
val resp_ptd = (io.dmem.resp_data_subword(1,0) === Bits(1,2)); val resp_ptd = io.dmem.resp_data_subword(1,0) === Bits(1)
val resp_pte = (io.dmem.resp_data_subword(1,0) === Bits(2,2)); val resp_pte = io.dmem.resp_data_subword(1,0) === Bits(2)
io.itlb.req_rdy := (state === s_ready) io.itlb.req_rdy := (state === s_ready)
io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val
@ -153,11 +155,9 @@ class rocketPTW extends Component
io.itlb.resp_perm := r_resp_perm io.itlb.resp_perm := r_resp_perm
io.dtlb.resp_perm := r_resp_perm io.dtlb.resp_perm := r_resp_perm
io.vitlb.resp_perm:= r_resp_perm io.vitlb.resp_perm:= r_resp_perm
val resp_ppn = val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0)))
Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-11, 0)), val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j))
Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)),
r_resp_ppn));
io.itlb.resp_ppn := resp_ppn; io.itlb.resp_ppn := resp_ppn;
io.dtlb.resp_ppn := resp_ppn; io.dtlb.resp_ppn := resp_ppn;
@ -167,78 +167,34 @@ class rocketPTW extends Component
switch (state) { switch (state) {
is (s_ready) { is (s_ready) {
when (req_val) { when (req_val) {
state := s_l1_req; state := s_req;
} }
count := UFix(0)
} }
// level 1 is (s_req) {
is (s_l1_req) {
when (io.dmem.req_rdy) { when (io.dmem.req_rdy) {
state := s_l1_wait; state := s_wait;
} }
} }
is (s_l1_wait) { is (s_wait) {
when (io.dmem.resp_nack) { when (io.dmem.resp_nack) {
state := s_l1_req state := s_req
}
when (dmem_resp_val) {
when (resp_ptd) { // page table descriptor
state := s_l2_req;
}
.elsewhen (resp_pte) { // page table entry
state := s_l1_fake;
}
.otherwise {
state := s_error;
}
}
}
is (s_l1_fake) {
state := s_ready;
}
// level 2
is (s_l2_req) {
when (io.dmem.req_rdy) {
state := s_l2_wait;
}
}
is (s_l2_wait) {
when (io.dmem.resp_nack) {
state := s_l2_req
}
when (dmem_resp_val) {
when (resp_ptd) { // page table descriptor
state := s_l3_req;
}
.elsewhen (resp_pte) { // page table entry
state := s_l2_fake;
}
.otherwise {
state := s_error;
}
}
}
is (s_l2_fake) {
state := s_ready;
}
// level 3
is (s_l3_req) {
when (io.dmem.req_rdy) {
state := s_l3_wait;
}
}
is (s_l3_wait) {
when (io.dmem.resp_nack) {
state := s_l3_req
} }
when (dmem_resp_val) { when (dmem_resp_val) {
when (resp_pte) { // page table entry when (resp_pte) { // page table entry
state := s_done; state := s_done
} }
.otherwise { .otherwise {
state := s_error; count := count + UFix(1)
when (resp_ptd && count < UFix(levels-1)) {
state := s_req
}
.otherwise {
state := s_error
}
} }
} }
} }
is (s_done) { is (s_done) {
state := s_ready; state := s_ready;
} }

View File

@ -69,27 +69,27 @@ object PopCount
object Reverse object Reverse
{ {
def apply(in: Bits) = def doit(in: Bits, base: Int, length: Int): Bits =
{ {
var out = in(in.getWidth-1) val half = (1 << log2up(length))/2
for (i <- 1 until in.getWidth) if (length == 1)
out = Cat(in(in.getWidth-i-1), out) in(base)
out else
Cat(doit(in, base, half), doit(in, base+half, length-half))
} }
def apply(in: Bits) = doit(in, 0, in.getWidth)
} }
object OHToUFix object OHToUFix
{ {
def apply(in: Bits): UFix = def apply(in: Seq[Bits]): UFix = {
{ if (in.size <= 1) return UFix(0)
val out = MuxCase( UFix(0), (0 until in.getWidth).map( i => (in(i).toBool, UFix(i)))) if (in.size == 2) return in(1)
out.toUFix val hi = in.slice(in.size/2, in.size)
} val lo = in.slice(0, in.size/2)
def apply(in: Seq[Bool]): UFix = Cat(hi.reduceLeft(_||_), apply(hi zip lo map { case (x, y) => x || y }))
{
val out = MuxCase( UFix(0), in.zipWithIndex map {case (b,i) => (b, UFix(i))})
out.toUFix
} }
def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_)))
} }
object UFixToOH object UFixToOH
@ -119,7 +119,7 @@ object ShiftRegister
object Mux1H object Mux1H
{ {
def buildMux[T <: Data](sel: Bits, in: Vec[T], i: Int, n: Int): T = { def buildMux[T <: Data](sel: Bits, in: Seq[T], i: Int, n: Int): T = {
if (n == 1) if (n == 1)
in(i) in(i)
else else
@ -131,8 +131,8 @@ object Mux1H
} }
} }
def apply [T <: Data](sel: Bits, in: Vec[T]): T = buildMux(sel, in, 0, sel.getWidth) def apply [T <: Data](sel: Bits, in: Seq[T]): T = buildMux(sel, in, 0, sel.getWidth)
def apply [T <: Data](sel: Vec[Bool], in: Vec[T]): T = apply(sel.toBits, in) def apply [T <: Data](sel: Seq[Bool], in: Seq[T]): T = buildMux(Cat(Bits(0),sel.reverse:_*), in, 0, sel.size)
} }
class Mux1H [T <: Data](n: Int)(gen: => T) extends Component class Mux1H [T <: Data](n: Int)(gen: => T) extends Component