From 0fd777f4807f1bf67d69fcbcac2a03aa14124da4 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Sun, 26 Feb 2012 17:24:23 -0800 Subject: [PATCH 01/40] Merge branch 'master' of github.com:ucb-bar/riscv-rocket From 4f00bcc760ebdd1a41451857f85dc527dbaddf97 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 29 Feb 2012 17:12:02 -0800 Subject: [PATCH 02/40] Merge branch 'master' of github.com:ucb-bar/riscv-rocket From 2607153b6785875ead46253bc513f86578c0fa53 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 Mar 2012 02:08:55 -0800 Subject: [PATCH 03/40] Merge branch 'master' of github.com:ucb-bar/riscv-rocket From 1b733e7cf07c9e8abd95358198bf06d7ade92f48 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Tue, 13 Mar 2012 12:34:39 -0700 Subject: [PATCH 04/40] Merge branch 'master' of github.com:ucb-bar/riscv-rocket From 3129040bda033ae34deffc14f7c4f8f3e0b58f8d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 15 Mar 2012 18:36:07 -0700 Subject: [PATCH 05/40] use divided clk for htif. UDPATE YOUR FESVR by default, we now load programs via a backdoor, because otherwise it takes too long to simulate. --- uncore/slowio.scala | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 uncore/slowio.scala diff --git a/uncore/slowio.scala b/uncore/slowio.scala new file mode 100644 index 00000000..c1535044 --- /dev/null +++ b/uncore/slowio.scala @@ -0,0 +1,49 @@ +package rocket + +import Chisel._ +import Constants._ + +class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Component +{ + val io = new Bundle { + val out_fast = new ioDecoupled()(data).flip + val out_slow = new ioDecoupled()(data) + + val in_fast = new ioDecoupled()(data) + val in_slow = new ioDecoupled()(data).flip + + val clk_slow = Bool(OUTPUT) + } + + require((divisor & (divisor-1)) == 0) + require(hold_cycles < divisor/2 && hold_cycles >= 2) + + val cnt = Reg() { UFix(width = log2up(divisor)) } + cnt := cnt + UFix(1) + val out_en = cnt === UFix(divisor/2+hold_cycles-1) // rising edge + hold time + val in_en = cnt === UFix(divisor/2-1) // rising edge + + val in_slow_rdy = Reg(resetVal = Bool(false)) + val out_slow_val = Reg(resetVal = Bool(false)) + val out_slow_bits = Reg() { data } + + val fromhost_q = new queue(1)(data) + fromhost_q.io.enq.valid := in_en && io.in_slow.valid && in_slow_rdy + fromhost_q.io.enq.bits := io.in_slow.bits + fromhost_q.io.deq <> io.in_fast + + val tohost_q = new queue(1)(data) + tohost_q.io.enq <> io.out_fast + tohost_q.io.deq.ready := in_en && io.out_slow.ready && out_slow_val + + when (out_en) { + in_slow_rdy := fromhost_q.io.enq.ready + out_slow_val := tohost_q.io.deq.valid + out_slow_bits := tohost_q.io.deq.bits + } + + io.in_slow.ready := in_slow_rdy + io.out_slow.valid := out_slow_val + io.out_slow.bits := out_slow_bits + io.clk_slow := cnt(log2up(divisor)-1).toBool +} From a7ebea13fcab7fc79b56ad402655531ed8c4650a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 17:03:58 -0700 Subject: [PATCH 06/40] add mem serdes unit --- uncore/memserdes.scala | 89 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 uncore/memserdes.scala diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala new file mode 100644 index 00000000..2305ed69 --- /dev/null +++ b/uncore/memserdes.scala @@ -0,0 +1,89 @@ +package rocket + +import Chisel._ +import Node._ +import Constants._ +import scala.math._ + +class ioMemSerialized(w: Int) extends Bundle +{ + val req = (new ioDecoupled) { Bits(width = w) } + val resp = (new ioPipe) { Bits(width = w) }.flip +} + +class MemSerdes(w: Int) extends Component +{ + val io = new Bundle { + val wide = new ioMem().flip + val narrow = new ioMemSerialized(w) + } + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + + val out_buf = Reg() { Bits() } + val in_buf = Reg() { Bits() } + + val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } + val state = Reg(resetVal = s_idle) + val send_cnt = Reg(resetVal = UFix(0, log2up(max(abits, dbits)))) + val data_send_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) + val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) + + when (state === s_idle) { + when (io.wide.req_cmd.valid) { + state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) + } + } + when (state === s_read_addr && adone) { + state := s_idle + send_cnt := UFix(0) + } + when (state === s_write_addr && adone) { + state := s_write_idle + send_cnt := UFix(0) + } + when (state === s_write_idle && io.wide.req_data.valid) { + state := s_write_data + } + when (state === s_write_data && ddone) { + data_send_cnt := data_send_cnt + UFix(1) + state := Mux(data_send_cnt === UFix(REFILL_CYCLES-1), s_idle, s_write_idle) + } + + when (io.narrow.req.valid && io.narrow.req.ready) { + send_cnt := Mux(adone, UFix(0), send_cnt + UFix(1)) + out_buf := out_buf >> UFix(w) + } + when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { + out_buf := io.wide.req_cmd.bits.toBits + } + when (io.wide.req_data.valid && io.wide.req_data.ready) { + out_buf := io.wide.req_data.bits.toBits + } + + io.wide.req_cmd.ready := state === s_idle + io.wide.req_data.ready := state === s_write_idle + io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data + io.narrow.req.bits := out_buf + + val recv_cnt = Reg() { UFix(width = log2up(rbits)) } + val data_recv_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val resp_val = Reg(resetVal = Bool(false)) + + resp_val := Bool(false) + when (io.narrow.resp.valid) { + recv_cnt := recv_cnt + UFix(1) + when (recv_cnt === UFix((rbits-1)/w)) { + recv_cnt := UFix(0) + data_recv_cnt := data_recv_cnt + UFix(1) + resp_val := Bool(true) + } + in_buf := Cat(io.narrow.resp.bits, in_buf(rbits-1,w)) + } + + io.wide.resp.valid := resp_val + io.wide.resp.bits.tag := in_buf(io.wide.resp.bits.tag.width-1,0) + io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) +} From 5a00143035d1053bd30db2daac4460360c470fa1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 21:45:10 -0700 Subject: [PATCH 07/40] loop host.in to host.out during reset --- uncore/slowio.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncore/slowio.scala b/uncore/slowio.scala index c1535044..395bdc8d 100644 --- a/uncore/slowio.scala +++ b/uncore/slowio.scala @@ -28,7 +28,7 @@ class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Comp val out_slow_bits = Reg() { data } val fromhost_q = new queue(1)(data) - fromhost_q.io.enq.valid := in_en && io.in_slow.valid && in_slow_rdy + fromhost_q.io.enq.valid := in_en && (io.in_slow.valid && in_slow_rdy || reset) fromhost_q.io.enq.bits := io.in_slow.bits fromhost_q.io.deq <> io.in_fast @@ -39,7 +39,7 @@ class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Comp when (out_en) { in_slow_rdy := fromhost_q.io.enq.ready out_slow_val := tohost_q.io.deq.valid - out_slow_bits := tohost_q.io.deq.bits + out_slow_bits := Mux(reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits) } io.in_slow.ready := in_slow_rdy From 4e6302fedc326e2c95549c10839561e0c5d2759f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 23:03:20 -0700 Subject: [PATCH 08/40] add dessert --- uncore/memserdes.scala | 117 ++++++++++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 24 deletions(-) diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index 2305ed69..f68d36e8 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -27,14 +27,28 @@ class MemSerdes(w: Int) extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) val send_cnt = Reg(resetVal = UFix(0, log2up(max(abits, dbits)))) - val data_send_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) - when (state === s_idle) { - when (io.wide.req_cmd.valid) { - state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) - } + when (io.narrow.req.valid && io.narrow.req.ready) { + send_cnt := send_cnt + UFix(1) + out_buf := out_buf >> UFix(w) + } + when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { + out_buf := io.wide.req_cmd.bits.toBits + } + when (io.wide.req_data.valid && io.wide.req_data.ready) { + out_buf := io.wide.req_data.bits.toBits + } + + io.wide.req_cmd.ready := state === s_idle + io.wide.req_data.ready := state === s_write_idle + io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data + io.narrow.req.bits := out_buf + + when (state === s_idle && io.wide.req_cmd.valid) { + state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) } when (state === s_read_addr && adone) { state := s_idle @@ -50,26 +64,11 @@ class MemSerdes(w: Int) extends Component when (state === s_write_data && ddone) { data_send_cnt := data_send_cnt + UFix(1) state := Mux(data_send_cnt === UFix(REFILL_CYCLES-1), s_idle, s_write_idle) + send_cnt := UFix(0) } - when (io.narrow.req.valid && io.narrow.req.ready) { - send_cnt := Mux(adone, UFix(0), send_cnt + UFix(1)) - out_buf := out_buf >> UFix(w) - } - when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { - out_buf := io.wide.req_cmd.bits.toBits - } - when (io.wide.req_data.valid && io.wide.req_data.ready) { - out_buf := io.wide.req_data.bits.toBits - } - - io.wide.req_cmd.ready := state === s_idle - io.wide.req_data.ready := state === s_write_idle - io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data - io.narrow.req.bits := out_buf - - val recv_cnt = Reg() { UFix(width = log2up(rbits)) } - val data_recv_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val resp_val = Reg(resetVal = Bool(false)) resp_val := Bool(false) @@ -80,10 +79,80 @@ class MemSerdes(w: Int) extends Component data_recv_cnt := data_recv_cnt + UFix(1) resp_val := Bool(true) } - in_buf := Cat(io.narrow.resp.bits, in_buf(rbits-1,w)) + in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w)) } io.wide.resp.valid := resp_val io.wide.resp.bits.tag := in_buf(io.wide.resp.bits.tag.width-1,0) io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) } + +class MemDessert(w: Int) extends Component // test rig side +{ + val io = new Bundle { + val narrow = new ioMemSerialized(w).flip + val wide = new ioMem + } + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + + require(dbits >= abits && rbits >= dbits) + val recv_cnt = Reg(resetVal = UFix(0, log2up(rbits))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/w) + val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/w) + val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/w) + + val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(5) { UFix() } + val state = Reg(resetVal = s_cmd_recv) + + val in_buf = Reg() { Bits() } + when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { + recv_cnt := recv_cnt + UFix(1) + in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w)) + } + io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv + + when (state === s_cmd_recv && adone) { + state := s_cmd + recv_cnt := UFix(0) + } + when (state === s_cmd && io.wide.req_cmd.ready) { + state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply) + } + when (state === s_data_recv && ddone) { + state := s_data + recv_cnt := UFix(0) + } + when (state === s_data && io.wide.req_data.ready) { + state := s_data_recv + when (data_recv_cnt === UFix(REFILL_CYCLES-1)) { + state := s_cmd_recv + } + data_recv_cnt := data_recv_cnt + UFix(1) + } + when (rdone) { // state === s_reply + when (data_recv_cnt === UFix(REFILL_CYCLES-1)) { + state := s_cmd_recv + } + recv_cnt := UFix(0) + data_recv_cnt := data_recv_cnt + UFix(1) + } + + val req_cmd = in_buf >> UFix(((rbits+w-1)/w - (abits+w-1)/w)*w) + io.wide.req_cmd.valid := state === s_cmd + io.wide.req_cmd.bits.tag := req_cmd + io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width) + io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.addr.width) + + io.wide.req_data.valid := state === s_data + io.wide.req_data.bits.data := in_buf >> UFix(((rbits+w-1)/w - (dbits+w-1)/w)*w) + + val dataq = (new queue(REFILL_CYCLES)) { new MemResp } + dataq.io.enq <> io.wide.resp + dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/w) + + io.narrow.resp.valid := dataq.io.deq.valid + io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(w)) +} From 25fe46dc18554e2a3bf78d977a943b64258535c9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Mar 2012 14:18:57 -0700 Subject: [PATCH 09/40] remove bug from dessert --- uncore/memserdes.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index f68d36e8..c1d1fccb 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -26,7 +26,7 @@ class MemSerdes(w: Int) extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2up(max(abits, dbits)))) + val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+w-1)/w))) val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) @@ -98,7 +98,7 @@ class MemDessert(w: Int) extends Component // test rig side val rbits = io.wide.resp.bits.getWidth require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2up(rbits))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/w) val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/w) From 257747a3a149f3f90d9645ab3a2cd3b5ca10b09e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Mar 2012 23:50:09 -0700 Subject: [PATCH 10/40] no dessert tonight :( --- uncore/memserdes.scala | 46 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index c1d1fccb..27bbaa95 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -5,17 +5,17 @@ import Node._ import Constants._ import scala.math._ -class ioMemSerialized(w: Int) extends Bundle +class ioMemSerialized extends Bundle { - val req = (new ioDecoupled) { Bits(width = w) } - val resp = (new ioPipe) { Bits(width = w) }.flip + val req = (new ioDecoupled) { Bits(width = MEM_BACKUP_WIDTH) } + val resp = (new ioPipe) { Bits(width = MEM_BACKUP_WIDTH) }.flip } -class MemSerdes(w: Int) extends Component +class MemSerdes extends Component { val io = new Bundle { val wide = new ioMem().flip - val narrow = new ioMemSerialized(w) + val narrow = new ioMemSerialized } val abits = io.wide.req_cmd.bits.toBits.getWidth val dbits = io.wide.req_data.bits.toBits.getWidth @@ -26,14 +26,14 @@ class MemSerdes(w: Int) extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+w-1)/w))) + val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) - val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) + val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) + val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) when (io.narrow.req.valid && io.narrow.req.ready) { send_cnt := send_cnt + UFix(1) - out_buf := out_buf >> UFix(w) + out_buf := out_buf >> UFix(MEM_BACKUP_WIDTH) } when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { out_buf := io.wide.req_cmd.bits.toBits @@ -67,19 +67,19 @@ class MemSerdes(w: Int) extends Component send_cnt := UFix(0) } - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val resp_val = Reg(resetVal = Bool(false)) resp_val := Bool(false) when (io.narrow.resp.valid) { recv_cnt := recv_cnt + UFix(1) - when (recv_cnt === UFix((rbits-1)/w)) { + when (recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH)) { recv_cnt := UFix(0) data_recv_cnt := data_recv_cnt + UFix(1) resp_val := Bool(true) } - in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w)) + in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH*MEM_BACKUP_WIDTH-1,MEM_BACKUP_WIDTH)) } io.wide.resp.valid := resp_val @@ -87,10 +87,10 @@ class MemSerdes(w: Int) extends Component io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) } -class MemDessert(w: Int) extends Component // test rig side +class MemDessert extends Component // test rig side { val io = new Bundle { - val narrow = new ioMemSerialized(w).flip + val narrow = new ioMemSerialized().flip val wide = new ioMem } val abits = io.wide.req_cmd.bits.toBits.getWidth @@ -98,11 +98,11 @@ class MemDessert(w: Int) extends Component // test rig side val rbits = io.wide.resp.bits.getWidth require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/w) - val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/w) - val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/w) + val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) + val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) + val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_cmd_recv) @@ -110,7 +110,7 @@ class MemDessert(w: Int) extends Component // test rig side val in_buf = Reg() { Bits() } when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { recv_cnt := recv_cnt + UFix(1) - in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w)) + in_buf := Cat(io.narrow.req.bits, in_buf((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH*MEM_BACKUP_WIDTH-1,MEM_BACKUP_WIDTH)) } io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv @@ -140,19 +140,19 @@ class MemDessert(w: Int) extends Component // test rig side data_recv_cnt := data_recv_cnt + UFix(1) } - val req_cmd = in_buf >> UFix(((rbits+w-1)/w - (abits+w-1)/w)*w) + val req_cmd = in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (abits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) io.wide.req_cmd.valid := state === s_cmd io.wide.req_cmd.bits.tag := req_cmd io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width) io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.addr.width) io.wide.req_data.valid := state === s_data - io.wide.req_data.bits.data := in_buf >> UFix(((rbits+w-1)/w - (dbits+w-1)/w)*w) + io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) val dataq = (new queue(REFILL_CYCLES)) { new MemResp } dataq.io.enq <> io.wide.resp - dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/w) + dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) io.narrow.resp.valid := dataq.io.deq.valid - io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(w)) + io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(MEM_BACKUP_WIDTH)) } From d301336c3366bc18263ed486d4aa055b85a598ba Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 3 Apr 2012 12:03:05 -0700 Subject: [PATCH 11/40] Refactored coherence better from uncore hub, better coherence function names --- uncore/uncore.scala | 503 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 uncore/uncore.scala diff --git a/uncore/uncore.scala b/uncore/uncore.scala new file mode 100644 index 00000000..fa6e2b1e --- /dev/null +++ b/uncore/uncore.scala @@ -0,0 +1,503 @@ +package rocket + +import Chisel._ +import Constants._ + +class MemData extends Bundle { + val data = Bits(width = MEM_DATA_BITS) +} + +class MemReqCmd() extends Bundle +{ + val rw = Bool() + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val tag = Bits(width = MEM_TAG_BITS) +} + +class MemResp () extends MemData +{ + val tag = Bits(width = MEM_TAG_BITS) +} + +class ioMem() extends Bundle +{ + val req_cmd = (new ioDecoupled) { new MemReqCmd() } + val req_data = (new ioDecoupled) { new MemData() } + val resp = (new ioPipe) { new MemResp() }.flip +} + +class TrackerProbeData extends Bundle { + val tile_id = Bits(width = TILE_ID_BITS) +} + +class TrackerAllocReq extends Bundle { + val xact_init = new TransactionInit() + val tile_id = Bits(width = TILE_ID_BITS) +} + +class TrackerDependency extends Bundle { + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) +} + +class ioTileLink extends Bundle { + val xact_init = (new ioDecoupled) { new TransactionInit } + val xact_init_data = (new ioDecoupled) { new TransactionInitData } + val xact_abort = (new ioDecoupled) { new TransactionAbort }.flip + val probe_req = (new ioDecoupled) { new ProbeRequest }.flip + val probe_rep = (new ioDecoupled) { new ProbeReply } + val probe_rep_data = (new ioDecoupled) { new ProbeReplyData } + val xact_rep = (new ioPipe) { new TransactionReply }.flip + val xact_finish = (new ioDecoupled) { new TransactionFinish } +} + +class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { + val io = new Bundle { + val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip + val p_data = (new ioPipe) { new TrackerProbeData }.flip + val can_alloc = Bool(INPUT) + val xact_finish = Bool(INPUT) + val p_rep_cnt_dec = Bits(ntiles, INPUT) + val p_req_cnt_inc = Bits(ntiles, INPUT) + val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip + val x_init_data = (new ioPipe) { new TransactionInitData }.flip + val sent_x_rep_ack = Bool(INPUT) + val p_rep_data_dep = (new ioPipe) { new TrackerDependency }.flip + val x_init_data_dep = (new ioPipe) { new TrackerDependency }.flip + + val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } + val mem_req_data = (new ioDecoupled) { new MemData } + val mem_req_lock = Bool(OUTPUT) + val probe_req = (new ioDecoupled) { new ProbeRequest } + val busy = Bool(OUTPUT) + val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) + val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) + val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) + val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) + val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) + val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) + val push_p_req = Bits(ntiles, OUTPUT) + val pop_p_rep = Bits(ntiles, OUTPUT) + val pop_p_rep_data = Bits(ntiles, OUTPUT) + val pop_p_rep_dep = Bits(ntiles, OUTPUT) + val pop_x_init = Bits(ntiles, OUTPUT) + val pop_x_init_data = Bits(ntiles, OUTPUT) + val pop_x_init_dep = Bits(ntiles, OUTPUT) + val send_x_rep_ack = Bool(OUTPUT) + } + + def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { + req_cmd.valid := !cmd_sent && at_front_of_dep_queue + req_cmd.bits.rw := Bool(true) + req_data.valid := data.valid && at_front_of_dep_queue + req_data.bits := data.bits + lock := at_front_of_dep_queue + when(req_cmd.ready && req_cmd.valid) { + cmd_sent := Bool(true) + } + when(req_data.ready && at_front_of_dep_queue) { + pop_data := UFix(1) << tile_id + when (data.valid) { + mem_cnt := mem_cnt_next + when(mem_cnt_next === UFix(0)) { + pop_dep := UFix(1) << tile_id + trigger := Bool(false) + } + } + } + } + + def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { + req_cmd.valid := Bool(true) + req_cmd.bits.rw := Bool(false) + when(req_cmd.ready) { + trigger := Bool(false) + } + } + + val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } + val state = Reg(resetVal = s_idle) + val addr_ = Reg{ UFix() } + val t_type_ = Reg{ Bits() } + val init_tile_id_ = Reg{ Bits() } + val tile_xact_id_ = Reg{ Bits() } + val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) + val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) + val p_rep_tile_id_ = Reg{ Bits() } + val x_needs_read = Reg(resetVal = Bool(false)) + val x_init_data_needs_write = Reg(resetVal = Bool(false)) + val p_rep_data_needs_write = Reg(resetVal = Bool(false)) + val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val mem_cnt_next = mem_cnt + UFix(1) + val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) + + io.busy := state != s_idle + io.addr := addr_ + io.init_tile_id := init_tile_id_ + io.p_rep_tile_id := p_rep_tile_id_ + io.tile_xact_id := tile_xact_id_ + io.sharer_count := UFix(ntiles) // TODO: Broadcast only + io.t_type := t_type_ + + io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.bits.rw := Bool(false) + io.mem_req_cmd.bits.addr := addr_ + io.mem_req_cmd.bits.tag := UFix(id) + io.mem_req_data.valid := Bool(false) + io.mem_req_data.bits.data := UFix(0) + io.mem_req_lock := Bool(false) + io.probe_req.valid := Bool(false) + io.probe_req.bits.p_type := getProbeRequestType(t_type_, UFix(0)) + io.probe_req.bits.global_xact_id := UFix(id) + io.probe_req.bits.address := addr_ + io.push_p_req := Bits(0, width = ntiles) + io.pop_p_rep := Bits(0, width = ntiles) + io.pop_p_rep_data := Bits(0, width = ntiles) + io.pop_p_rep_dep := Bits(0, width = ntiles) + io.pop_x_init := Bits(0, width = ntiles) + io.pop_x_init_data := Bits(0, width = ntiles) + io.pop_x_init_dep := Bits(0, width = ntiles) + io.send_x_rep_ack := Bool(false) + + switch (state) { + is(s_idle) { + when( io.alloc_req.valid && io.can_alloc ) { + addr_ := io.alloc_req.bits.xact_init.address + t_type_ := io.alloc_req.bits.xact_init.t_type + init_tile_id_ := io.alloc_req.bits.tile_id + tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id + x_init_data_needs_write := hasData(io.alloc_req.bits.xact_init) + x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) + if(ntiles > 1) p_rep_count := UFix(ntiles-1) + val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only + p_req_flags := p_req_initial_flags + mem_cnt := UFix(0) + p_w_mem_cmd_sent := Bool(false) + x_w_mem_cmd_sent := Bool(false) + io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id + state := Mux(p_req_initial_flags.orR, s_probe, s_mem) + } + } + is(s_probe) { + when(p_req_flags.orR) { + io.push_p_req := p_req_flags + io.probe_req.valid := Bool(true) + } + when(io.p_req_cnt_inc.orR) { + p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs + } + when(io.p_rep_cnt_dec.orR) { + val dec = PopCount(io.p_rep_cnt_dec) + io.pop_p_rep := io.p_rep_cnt_dec + if(ntiles > 1) p_rep_count := p_rep_count - dec + when(p_rep_count === dec) { + state := s_mem + } + } + when(io.p_data.valid) { + p_rep_data_needs_write := Bool(true) + p_rep_tile_id_ := io.p_data.bits.tile_id + } + } + is(s_mem) { + when (p_rep_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.p_rep_data, + p_rep_data_needs_write, + p_w_mem_cmd_sent, + io.pop_p_rep_data, + io.pop_p_rep_dep, + io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)), + p_rep_tile_id_) + } . elsewhen(x_init_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.x_init_data, + x_init_data_needs_write, + x_w_mem_cmd_sent, + io.pop_x_init_data, + io.pop_x_init_dep, + io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)), + init_tile_id_) + } . elsewhen (x_needs_read) { + doMemReqRead(io.mem_req_cmd, x_needs_read) + } . otherwise { + state := Mux(needsAckReply(t_type_, UFix(0)), s_ack, s_busy) + } + } + is(s_ack) { + io.send_x_rep_ack := Bool(true) + when(io.sent_x_rep_ack) { state := s_busy } + } + is(s_busy) { // Nothing left to do but wait for transaction to complete + when (io.xact_finish) { + state := s_idle + } + } + } +} + +abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy { + val io = new Bundle { + val tiles = Vec(ntiles) { new ioTileLink() }.flip + val mem = new ioMem + } +} + +class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence +{ + val x_init = io.tiles(0).xact_init + val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED + x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp + io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) + io.mem.req_cmd.bits.rw := is_write + io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id + io.mem.req_cmd.bits.addr := x_init.bits.address + io.mem.req_data <> io.tiles(0).xact_init_data + + val x_rep = io.tiles(0).xact_rep + x_rep.bits.t_type := Mux(io.mem.resp.valid, X_REP_READ_EXCLUSIVE, X_REP_WRITE_UNCACHED) + x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) + x_rep.bits.global_xact_id := UFix(0) // don't care + x_rep.bits.data := io.mem.resp.bits.data + x_rep.bits.require_ack := Bool(true) + x_rep.valid := io.mem.resp.valid || x_init.valid && is_write && io.mem.req_cmd.ready + + io.tiles(0).xact_abort.valid := Bool(false) + io.tiles(0).xact_finish.ready := Bool(true) + io.tiles(0).probe_req.valid := Bool(false) + io.tiles(0).probe_rep.ready := Bool(true) + io.tiles(0).probe_rep_data.ready := Bool(true) +} + + +class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourStateCoherence +{ + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _)) + + val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } + val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } + val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + + val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } + val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } + val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } + val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + busy_arr(i) := t.busy + addr_arr(i) := t.addr + init_tile_id_arr(i) := t.init_tile_id + tile_xact_id_arr(i) := t.tile_xact_id + t_type_arr(i) := t.t_type + sh_count_arr(i) := t.sharer_count + send_x_rep_ack_arr(i) := t.send_x_rep_ack + t.xact_finish := do_free_arr(i) + t.p_data.bits.tile_id := p_data_tile_id_arr(i) + t.p_data.valid := p_data_valid_arr(i) + t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits + t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits + t.sent_x_rep_ack := sent_x_rep_ack_arr(i) + do_free_arr(i) := Bool(false) + sent_x_rep_ack_arr(i) := Bool(false) + p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) + p_data_valid_arr(i) := Bool(false) + for( j <- 0 until ntiles) { + p_rep_cnt_dec_arr(i)(j) := Bool(false) + p_req_cnt_inc_arr(i)(j) := Bool(false) + } + } + + val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY + + // Free finished transactions + for( j <- 0 until ntiles ) { + val finish = io.tiles(j).xact_finish + when (finish.valid) { + do_free_arr(finish.bits.global_xact_id) := Bool(true) + } + finish.ready := Bool(true) + } + + // Reply to initial requestor + // Forward memory responses from mem to tile or arbitrate to ack + val mem_idx = io.mem.resp.bits.tag + val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) + for( j <- 0 until ntiles ) { + val rep = io.tiles(j).xact_rep + rep.bits.t_type := UFix(0) + rep.bits.tile_xact_id := UFix(0) + rep.bits.global_xact_id := UFix(0) + rep.bits.data := io.mem.resp.bits.data + rep.bits.require_ack := Bool(true) + rep.valid := Bool(false) + when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { + rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) + rep.bits.global_xact_id := mem_idx + rep.valid := Bool(true) + } . otherwise { + rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) + rep.bits.global_xact_id := ack_idx + when (UFix(j) === init_tile_id_arr(ack_idx)) { + rep.valid := send_x_rep_ack_arr.toBits.orR + sent_x_rep_ack_arr(ack_idx) := Bool(true) + } + } + } + // If there were a ready signal due to e.g. intervening network use: + //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready + + // Create an arbiter for the one memory port + // We have to arbitrate between the different trackers' memory requests + // and once we have picked a request, get the right write data + val mem_req_cmd_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemReqCmd() } + val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } + for( i <- 0 until NGLOBAL_XACTS ) { + mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd + mem_req_cmd_arb.io.lock(i) <> trackerList(i).io.mem_req_lock + mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data + mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock + } + io.mem.req_cmd <> mem_req_cmd_arb.io.out + io.mem.req_data <> mem_req_data_arb.io.out + + // Handle probe replies, which may or may not have data + for( j <- 0 until ntiles ) { + val p_rep = io.tiles(j).probe_rep + val p_rep_data = io.tiles(j).probe_rep_data + val idx = p_rep.bits.global_xact_id + val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) + val do_pop = foldR(pop_p_reps)(_ || _) + p_rep.ready := Bool(true) + p_rep_data_dep_list(j).io.enq.valid := do_pop + p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) + p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) + when (p_rep.valid && co.messageHasData(p_rep.bits)) { + p_data_valid_arr(idx) := Bool(true) + p_data_tile_id_arr(idx) := UFix(j) + } + p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) + } + for( i <- 0 until NGLOBAL_XACTS ) { + trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid + trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits + + trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) + trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) + + for( j <- 0 until ntiles) { + val p_rep = io.tiles(j).probe_rep + p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) + } + } + + // Nack conflicting transaction init attempts + val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } + val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } + val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } + for( j <- 0 until ntiles ) { + val x_init = io.tiles(j).xact_init + val x_init_data = io.tiles(j).xact_init_data + val x_abort = io.tiles(j).xact_abort + val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + conflicts(i) := t.busy && x_init.valid && isCoherenceConflict(t.addr, x_init.bits.address) + } + x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id + want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && hasData(x_init.bits))) + + x_abort.valid := Bool(false) + switch(abort_state_arr(j)) { + is(s_idle) { + when(want_to_abort_arr(j)) { + when(hasData(x_init.bits)) { + abort_state_arr(j) := s_abort_drain + } . otherwise { + abort_state_arr(j) := s_abort_send + } + } + } + is(s_abort_drain) { // raises x_init_data.ready below + when(x_init_data.valid) { + abort_cnt := abort_cnt + UFix(1) + when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { + abort_state_arr(j) := s_abort_send + } + } + } + is(s_abort_send) { // nothing is dequeued for now + x_abort.valid := Bool(true) + when(x_abort.ready) { + abort_state_arr(j) := s_abort_complete + } + } + is(s_abort_complete) { // raises x_init.ready below + abort_state_arr(j) := s_idle + } + } + } + + // Handle transaction initiation requests + // Only one allocation per cycle + // Init requests may or may not have data + val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } + val init_arb = (new Arbiter(ntiles)) { new TrackerAllocReq() } + for( i <- 0 until NGLOBAL_XACTS ) { + alloc_arb.io.in(i).valid := !trackerList(i).io.busy + trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready + trackerList(i).io.alloc_req.bits := init_arb.io.out.bits + trackerList(i).io.alloc_req.valid := init_arb.io.out.valid + + trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits + trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid + trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) + trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) + } + for( j <- 0 until ntiles ) { + val x_init = io.tiles(j).xact_init + val x_init_data = io.tiles(j).xact_init_data + val x_init_data_dep = x_init_data_dep_list(j).io.deq + init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid + init_arb.io.in(j).bits.xact_init := x_init.bits + init_arb.io.in(j).bits.tile_id := UFix(j) + val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) + val do_pop = foldR(pop_x_inits)(_||_) + x_init_data_dep_list(j).io.enq.valid := do_pop && hasData(x_init.bits) && (abort_state_arr(j) === s_idle) + x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) + x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop + x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) + x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) + } + + alloc_arb.io.out.ready := init_arb.io.out.valid + + // Handle probe request generation + // Must arbitrate for each request port + val p_req_arb_arr = List.fill(ntiles)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) + for( j <- 0 until ntiles ) { + for( i <- 0 until NGLOBAL_XACTS ) { + val t = trackerList(i).io + p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits + p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) + p_req_cnt_inc_arr(i)(j) := p_req_arb_arr(j).io.in(i).ready + } + p_req_arb_arr(j).io.out <> io.tiles(j).probe_req + } + +} From f7307ee41164fe47fee1a1730c23f8c2929f0c0f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 3 Apr 2012 18:06:02 -0700 Subject: [PATCH 12/40] changed coherence message type names --- uncore/uncore.scala | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index fa6e2b1e..8dc88d44 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -74,7 +74,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) + val x_type = Bits(X_INIT_TYPE_BITS, OUTPUT) val push_p_req = Bits(ntiles, OUTPUT) val pop_p_rep = Bits(ntiles, OUTPUT) val pop_p_rep_data = Bits(ntiles, OUTPUT) @@ -117,7 +117,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } val state = Reg(resetVal = s_idle) val addr_ = Reg{ UFix() } - val t_type_ = Reg{ Bits() } + val x_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) @@ -138,7 +138,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.p_rep_tile_id := p_rep_tile_id_ io.tile_xact_id := tile_xact_id_ io.sharer_count := UFix(ntiles) // TODO: Broadcast only - io.t_type := t_type_ + io.x_type := x_type_ io.mem_req_cmd.valid := Bool(false) io.mem_req_cmd.bits.rw := Bool(false) @@ -148,7 +148,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.mem_req_data.bits.data := UFix(0) io.mem_req_lock := Bool(false) io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := getProbeRequestType(t_type_, UFix(0)) + io.probe_req.bits.p_type := getProbeRequestType(x_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ io.push_p_req := Bits(0, width = ntiles) @@ -164,11 +164,11 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc is(s_idle) { when( io.alloc_req.valid && io.can_alloc ) { addr_ := io.alloc_req.bits.xact_init.address - t_type_ := io.alloc_req.bits.xact_init.t_type + x_type_ := io.alloc_req.bits.xact_init.x_type init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := hasData(io.alloc_req.bits.xact_init) - x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) + x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) if(ntiles > 1) p_rep_count := UFix(ntiles-1) val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only p_req_flags := p_req_initial_flags @@ -226,7 +226,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { - state := Mux(needsAckReply(t_type_, UFix(0)), s_ack, s_busy) + state := Mux(needsAckReply(x_type_, UFix(0)), s_ack, s_busy) } } is(s_ack) { @@ -251,7 +251,7 @@ abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED + val is_write = x_init.bits.x_type === xactInitWriteUncached x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -260,7 +260,7 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.t_type := Mux(io.mem.resp.valid, X_REP_READ_EXCLUSIVE, X_REP_WRITE_UNCACHED) + x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyReadExclusive, xactReplyWriteUncached) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data @@ -283,7 +283,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } + val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } @@ -300,7 +300,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS addr_arr(i) := t.addr init_tile_id_arr(i) := t.init_tile_id tile_xact_id_arr(i) := t.tile_xact_id - t_type_arr(i) := t.t_type + x_type_arr(i) := t.x_type sh_count_arr(i) := t.sharer_count send_x_rep_ack_arr(i) := t.send_x_rep_ack t.xact_finish := do_free_arr(i) @@ -337,19 +337,19 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) for( j <- 0 until ntiles ) { val rep = io.tiles(j).xact_rep - rep.bits.t_type := UFix(0) + rep.bits.x_type := UFix(0) rep.bits.tile_xact_id := UFix(0) rep.bits.global_xact_id := UFix(0) rep.bits.data := io.mem.resp.bits.data rep.bits.require_ack := Bool(true) rep.valid := Bool(false) when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.x_type := getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) rep.bits.global_xact_id := mem_idx rep.valid := Bool(true) } . otherwise { - rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.x_type := getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx when (UFix(j) === init_tile_id_arr(ack_idx)) { From a68f5e016d5a9567095750909582e5b5e2a98153 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 4 Apr 2012 13:57:08 -0700 Subject: [PATCH 13/40] changed coherence type width names to represent max sizes for all protocols --- uncore/uncore.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 8dc88d44..b907c51d 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -74,7 +74,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val x_type = Bits(X_INIT_TYPE_BITS, OUTPUT) + val x_type = Bits(X_INIT_TYPE_MAX_BITS, OUTPUT) val push_p_req = Bits(ntiles, OUTPUT) val pop_p_rep = Bits(ntiles, OUTPUT) val pop_p_rep_data = Bits(ntiles, OUTPUT) @@ -283,7 +283,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } + val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_MAX_BITS)} } val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } From 5acf1d982014bab2c571faf20317ea92ca8f96c7 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 4 Apr 2012 15:51:33 -0700 Subject: [PATCH 14/40] defined abstract coherence traits in base trait, added Incoherent trait, cleaned up incoherent policy --- uncore/uncore.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index b907c51d..14164f7f 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -251,7 +251,7 @@ abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.x_type === xactInitWriteUncached + val is_write = x_init.bits.x_type === xactInitWriteback x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -260,7 +260,7 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyReadExclusive, xactReplyWriteUncached) + x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyData, xactReplyAck) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data From 1920c97066f6d1a22f6389621a559243fdfc2ab9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 10 Apr 2012 00:09:58 -0700 Subject: [PATCH 15/40] Refactored coherence as member rather than trait. MI and MEI protocols. --- uncore/uncore.scala | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 14164f7f..a8fc9432 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -50,7 +50,7 @@ class ioTileLink extends Bundle { val xact_finish = (new ioDecoupled) { new TransactionFinish } } -class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { +class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip val p_data = (new ioPipe) { new TrackerProbeData }.flip @@ -140,7 +140,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.sharer_count := UFix(ntiles) // TODO: Broadcast only io.x_type := x_type_ - io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.valid := Bool(false) io.mem_req_cmd.bits.rw := Bool(false) io.mem_req_cmd.bits.addr := addr_ io.mem_req_cmd.bits.tag := UFix(id) @@ -148,7 +148,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.mem_req_data.bits.data := UFix(0) io.mem_req_lock := Bool(false) io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := getProbeRequestType(x_type_, UFix(0)) + io.probe_req.bits.p_type := co.getProbeRequestType(x_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ io.push_p_req := Bits(0, width = ntiles) @@ -167,8 +167,8 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc x_type_ := io.alloc_req.bits.xact_init.x_type init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_write := hasData(io.alloc_req.bits.xact_init) - x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) + x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) + x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) if(ntiles > 1) p_rep_count := UFix(ntiles-1) val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only p_req_flags := p_req_initial_flags @@ -226,7 +226,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { - state := Mux(needsAckReply(x_type_, UFix(0)), s_ack, s_busy) + state := Mux(co.needsAckReply(x_type_, UFix(0)), s_ack, s_busy) } } is(s_ack) { @@ -241,17 +241,17 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc } } -abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy { +abstract class CoherenceHub(ntiles: Int, co: CoherencePolicy) extends Component { val io = new Bundle { val tiles = Vec(ntiles) { new ioTileLink() }.flip val mem = new ioMem } } -class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence +class CoherenceHubNull(co: ThreeStateIncoherence) extends CoherenceHub(1, co) { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.x_type === xactInitWriteback + val is_write = x_init.bits.x_type === co.xactInitWriteback x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -260,7 +260,7 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyData, xactReplyAck) + x_rep.bits.x_type := Mux(io.mem.resp.valid, co.xactReplyData, co.xactReplyAck) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data @@ -275,9 +275,9 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence } -class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourStateCoherence +class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceHub(ntiles, co) { - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _)) + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _, co)) val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } @@ -344,12 +344,12 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS rep.bits.require_ack := Bool(true) rep.valid := Bool(false) when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.x_type := getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.x_type := co.getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) rep.bits.global_xact_id := mem_idx rep.valid := Bool(true) } . otherwise { - rep.bits.x_type := getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.x_type := co.getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx when (UFix(j) === init_tile_id_arr(ack_idx)) { @@ -417,16 +417,16 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && isCoherenceConflict(t.addr, x_init.bits.address) + conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.address) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && hasData(x_init.bits))) + want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && co.messageHasData(x_init.bits))) x_abort.valid := Bool(false) switch(abort_state_arr(j)) { is(s_idle) { when(want_to_abort_arr(j)) { - when(hasData(x_init.bits)) { + when(co.messageHasData(x_init.bits)) { abort_state_arr(j) := s_abort_drain } . otherwise { abort_state_arr(j) := s_abort_send @@ -478,7 +478,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS init_arb.io.in(j).bits.tile_id := UFix(j) val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) val do_pop = foldR(pop_x_inits)(_||_) - x_init_data_dep_list(j).io.enq.valid := do_pop && hasData(x_init.bits) && (abort_state_arr(j) === s_idle) + x_init_data_dep_list(j).io.enq.valid := do_pop && co.messageHasData(x_init.bits) && (abort_state_arr(j) === s_idle) x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) From 98a5d682a59c3a183145ce85628f58e11e8febcf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 10 Apr 2012 02:22:45 -0700 Subject: [PATCH 16/40] coherence mostly works now --- uncore/uncore.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index a8fc9432..756c6e7b 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -86,11 +86,11 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { } def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.valid := !cmd_sent && at_front_of_dep_queue + req_cmd.valid := !cmd_sent && data.valid && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) req_data.valid := data.valid && at_front_of_dep_queue req_data.bits := data.bits - lock := at_front_of_dep_queue + lock := data.valid && at_front_of_dep_queue when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) } @@ -383,8 +383,8 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) val do_pop = foldR(pop_p_reps)(_ || _) p_rep.ready := Bool(true) - p_rep_data_dep_list(j).io.enq.valid := do_pop - p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) + p_rep_data_dep_list(j).io.enq.valid := p_rep.valid && co.messageHasData(p_rep.bits) + p_rep_data_dep_list(j).io.enq.bits.global_xact_id := p_rep.bits.global_xact_id p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) when (p_rep.valid && co.messageHasData(p_rep.bits)) { p_data_valid_arr(idx) := Bool(true) From 37eb1a4ae615a8e0d8f94fdff342c8bc38204ec1 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 18 Apr 2012 16:31:14 -0700 Subject: [PATCH 17/40] Fixed coherence bug: probe counting for single tile --- uncore/uncore.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 756c6e7b..29d0befd 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -169,14 +169,16 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - if(ntiles > 1) p_rep_count := UFix(ntiles-1) val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only - p_req_flags := p_req_initial_flags + p_req_flags := p_req_initial_flags(ntiles-1,0) mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id - state := Mux(p_req_initial_flags.orR, s_probe, s_mem) + if(ntiles > 1) { + p_rep_count := UFix(ntiles-1) + state := Mux(p_req_initial_flags(ntiles-1,0).orR, s_probe, s_mem) + } else state := s_mem } } is(s_probe) { From 00155f4bc48247b661ffd46831d999d8a35f9918 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 24 Apr 2012 15:11:59 -0700 Subject: [PATCH 18/40] Fixed abort bug: removed uneeded state, added mshr guard on xact_abort.valid and xact_init.ready on same cycle --- uncore/uncore.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 29d0befd..0f482113 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -408,7 +408,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } // Nack conflicting transaction init attempts - val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } + val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } for( j <- 0 until ntiles ) { @@ -445,13 +445,10 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } is(s_abort_send) { // nothing is dequeued for now x_abort.valid := Bool(true) - when(x_abort.ready) { - abort_state_arr(j) := s_abort_complete + when(x_abort.ready) { // raises x_init.ready below + abort_state_arr(j) := s_idle } } - is(s_abort_complete) { // raises x_init.ready below - abort_state_arr(j) := s_idle - } } } @@ -475,6 +472,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_init_data_dep = x_init_data_dep_list(j).io.deq + val x_abort = io.tiles(j).xact_abort init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid init_arb.io.in(j).bits.xact_init := x_init.bits init_arb.io.in(j).bits.tile_id := UFix(j) @@ -482,7 +480,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val do_pop = foldR(pop_x_inits)(_||_) x_init_data_dep_list(j).io.enq.valid := do_pop && co.messageHasData(x_init.bits) && (abort_state_arr(j) === s_idle) x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) - x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop + x_init.ready := (x_abort.valid && x_abort.ready) || do_pop x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) } From f804c57bb02243004a76c6b7d9b9e6dc38454bfb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 3 May 2012 04:21:11 -0700 Subject: [PATCH 19/40] reduce HTIF clock divider for now --- uncore/slowio.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/uncore/slowio.scala b/uncore/slowio.scala index 395bdc8d..e54996ef 100644 --- a/uncore/slowio.scala +++ b/uncore/slowio.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import Constants._ -class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Component +class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) extends Component { val io = new Bundle { val out_fast = new ioDecoupled()(data).flip @@ -15,8 +15,9 @@ class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Comp val clk_slow = Bool(OUTPUT) } + val hold_cycles = if (hold_cycles_in == -1) divisor/4 else hold_cycles_in require((divisor & (divisor-1)) == 0) - require(hold_cycles < divisor/2 && hold_cycles >= 2) + require(hold_cycles < divisor/2 && hold_cycles >= 1) val cnt = Reg() { UFix(width = log2up(divisor)) } cnt := cnt + UFix(1) From 6f2f1ba21c573392a7b21f5afa9c863bb9114abe Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Thu, 24 May 2012 10:33:15 -0700 Subject: [PATCH 20/40] removing wires --- uncore/uncore.scala | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 0f482113..ab56d0f4 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -281,20 +281,20 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH { val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _, co)) - val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } - val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_MAX_BITS)} } - val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val busy_arr = Vec(NGLOBAL_XACTS){ Bool() } + val addr_arr = Vec(NGLOBAL_XACTS){ Bits(width=PADDR_BITS-OFFSET_BITS) } + val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } + val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_XACT_ID_BITS) } + val x_type_arr = Vec(NGLOBAL_XACTS){ Bits(width=X_INIT_TYPE_MAX_BITS) } + val sh_count_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } + val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } - val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } - val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } - val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } - val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } - val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + val do_free_arr = Vec(NGLOBAL_XACTS){ Bool() } + val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } + val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } + val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } + val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width = TILE_ID_BITS) } + val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io @@ -410,13 +410,13 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH // Nack conflicting transaction init attempts val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } - val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } + val want_to_abort_arr = Vec(ntiles) { Bool() } for( j <- 0 until ntiles ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } + val conflicts = Vec(NGLOBAL_XACTS) { Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.address) From 9b3161920f9a2f0b0905f2df4ee9788a413a3d6e Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Wed, 6 Jun 2012 12:47:17 -0700 Subject: [PATCH 21/40] moving util out into Chisel standard library --- uncore/memserdes.scala | 12 ++++++------ uncore/slowio.scala | 4 ++-- uncore/uncore.scala | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index 27bbaa95..8dc6a8ae 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -26,8 +26,8 @@ class MemSerdes extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val send_cnt = Reg(resetVal = UFix(0, log2Up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) + val data_send_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) @@ -67,8 +67,8 @@ class MemSerdes extends Component send_cnt := UFix(0) } - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val recv_cnt = Reg(resetVal = UFix(0, log2Up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val resp_val = Reg(resetVal = Bool(false)) resp_val := Bool(false) @@ -98,8 +98,8 @@ class MemDessert extends Component // test rig side val rbits = io.wide.resp.bits.getWidth require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val recv_cnt = Reg(resetVal = UFix(0, log2Up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) diff --git a/uncore/slowio.scala b/uncore/slowio.scala index e54996ef..0c513849 100644 --- a/uncore/slowio.scala +++ b/uncore/slowio.scala @@ -19,7 +19,7 @@ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) require((divisor & (divisor-1)) == 0) require(hold_cycles < divisor/2 && hold_cycles >= 1) - val cnt = Reg() { UFix(width = log2up(divisor)) } + val cnt = Reg() { UFix(width = log2Up(divisor)) } cnt := cnt + UFix(1) val out_en = cnt === UFix(divisor/2+hold_cycles-1) // rising edge + hold time val in_en = cnt === UFix(divisor/2-1) // rising edge @@ -46,5 +46,5 @@ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) io.in_slow.ready := in_slow_rdy io.out_slow.valid := out_slow_val io.out_slow.bits := out_slow_bits - io.clk_slow := cnt(log2up(divisor)-1).toBool + io.clk_slow := cnt(log2Up(divisor)-1).toBool } diff --git a/uncore/uncore.scala b/uncore/uncore.scala index ab56d0f4..9791ea0f 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -120,7 +120,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val x_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) + val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2Up(ntiles))) val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) val p_rep_tile_id_ = Reg{ Bits() } val x_needs_read = Reg(resetVal = Bool(false)) @@ -128,9 +128,9 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val p_rep_data_needs_write = Reg(resetVal = Bool(false)) val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) val mem_cnt_next = mem_cnt + UFix(1) - val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) + val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) io.busy := state != s_idle io.addr := addr_ @@ -415,7 +415,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort - val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val abort_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) val conflicts = Vec(NGLOBAL_XACTS) { Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io @@ -438,7 +438,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH is(s_abort_drain) { // raises x_init_data.ready below when(x_init_data.valid) { abort_cnt := abort_cnt + UFix(1) - when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { + when(abort_cnt === ~UFix(0, width = log2Up(REFILL_CYCLES))) { abort_state_arr(j) := s_abort_send } } From 166b85705533ef7d6d37ea049617b211087f7b1e Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Wed, 6 Jun 2012 18:22:56 -0700 Subject: [PATCH 22/40] ioDecoupled -> FIFOIO, ioPipe -> PipeIO --- uncore/memserdes.scala | 4 ++-- uncore/slowio.scala | 8 ++++---- uncore/uncore.scala | 44 +++++++++++++++++++++--------------------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index 8dc6a8ae..e20f1ec3 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -7,8 +7,8 @@ import scala.math._ class ioMemSerialized extends Bundle { - val req = (new ioDecoupled) { Bits(width = MEM_BACKUP_WIDTH) } - val resp = (new ioPipe) { Bits(width = MEM_BACKUP_WIDTH) }.flip + val req = (new FIFOIO) { Bits(width = MEM_BACKUP_WIDTH) } + val resp = (new PipeIO) { Bits(width = MEM_BACKUP_WIDTH) }.flip } class MemSerdes extends Component diff --git a/uncore/slowio.scala b/uncore/slowio.scala index 0c513849..6cf5a3d9 100644 --- a/uncore/slowio.scala +++ b/uncore/slowio.scala @@ -6,11 +6,11 @@ import Constants._ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) extends Component { val io = new Bundle { - val out_fast = new ioDecoupled()(data).flip - val out_slow = new ioDecoupled()(data) + val out_fast = new FIFOIO()(data).flip + val out_slow = new FIFOIO()(data) - val in_fast = new ioDecoupled()(data) - val in_slow = new ioDecoupled()(data).flip + val in_fast = new FIFOIO()(data) + val in_slow = new FIFOIO()(data).flip val clk_slow = Bool(OUTPUT) } diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 9791ea0f..6239981f 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -21,9 +21,9 @@ class MemResp () extends MemData class ioMem() extends Bundle { - val req_cmd = (new ioDecoupled) { new MemReqCmd() } - val req_data = (new ioDecoupled) { new MemData() } - val resp = (new ioPipe) { new MemResp() }.flip + val req_cmd = (new FIFOIO) { new MemReqCmd() } + val req_data = (new FIFOIO) { new MemData() } + val resp = (new PipeIO) { new MemResp() }.flip } class TrackerProbeData extends Bundle { @@ -40,34 +40,34 @@ class TrackerDependency extends Bundle { } class ioTileLink extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit } - val xact_init_data = (new ioDecoupled) { new TransactionInitData } - val xact_abort = (new ioDecoupled) { new TransactionAbort }.flip - val probe_req = (new ioDecoupled) { new ProbeRequest }.flip - val probe_rep = (new ioDecoupled) { new ProbeReply } - val probe_rep_data = (new ioDecoupled) { new ProbeReplyData } - val xact_rep = (new ioPipe) { new TransactionReply }.flip - val xact_finish = (new ioDecoupled) { new TransactionFinish } + val xact_init = (new FIFOIO) { new TransactionInit } + val xact_init_data = (new FIFOIO) { new TransactionInitData } + val xact_abort = (new FIFOIO) { new TransactionAbort }.flip + val probe_req = (new FIFOIO) { new ProbeRequest }.flip + val probe_rep = (new FIFOIO) { new ProbeReply } + val probe_rep_data = (new FIFOIO) { new ProbeReplyData } + val xact_rep = (new PipeIO) { new TransactionReply }.flip + val xact_finish = (new FIFOIO) { new TransactionFinish } } class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val io = new Bundle { - val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip - val p_data = (new ioPipe) { new TrackerProbeData }.flip + val alloc_req = (new FIFOIO) { new TrackerAllocReq }.flip + val p_data = (new PipeIO) { new TrackerProbeData }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(ntiles, INPUT) val p_req_cnt_inc = Bits(ntiles, INPUT) - val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip - val x_init_data = (new ioPipe) { new TransactionInitData }.flip + val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip + val x_init_data = (new PipeIO) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) - val p_rep_data_dep = (new ioPipe) { new TrackerDependency }.flip - val x_init_data_dep = (new ioPipe) { new TrackerDependency }.flip + val p_rep_data_dep = (new PipeIO) { new TrackerDependency }.flip + val x_init_data_dep = (new PipeIO) { new TrackerDependency }.flip - val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } - val mem_req_data = (new ioDecoupled) { new MemData } + val mem_req_cmd = (new FIFOIO) { new MemReqCmd } + val mem_req_data = (new FIFOIO) { new MemData } val mem_req_lock = Bool(OUTPUT) - val probe_req = (new ioDecoupled) { new ProbeRequest } + val probe_req = (new FIFOIO) { new ProbeRequest } val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) @@ -85,7 +85,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val send_x_rep_ack = Bool(OUTPUT) } - def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { + def doMemReqWrite(req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: PipeIO[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { req_cmd.valid := !cmd_sent && data.valid && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) req_data.valid := data.valid && at_front_of_dep_queue @@ -106,7 +106,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { } } - def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { + def doMemReqRead(req_cmd: FIFOIO[MemReqCmd], trigger: Bool) { req_cmd.valid := Bool(true) req_cmd.bits.rw := Bool(false) when(req_cmd.ready) { From 1ebfeeca8a0024e2dc2281dc926d1da6ba439e69 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 10 Jul 2012 05:23:29 -0700 Subject: [PATCH 23/40] add L2$ It still has performance bugs but no correctness bugs AFAIK. --- uncore/llc.scala | 392 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) create mode 100644 uncore/llc.scala diff --git a/uncore/llc.scala b/uncore/llc.scala new file mode 100644 index 00000000..71dfa536 --- /dev/null +++ b/uncore/llc.scala @@ -0,0 +1,392 @@ +package rocket + +import Chisel._ +import Node._ +import Constants._ + +class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component +{ + val io = new Bundle { + val addr = UFix(log2Up(n), INPUT) + val en = Bool(INPUT) + val rw = Bool(INPUT) + val wdata = gen.asInput + val wmask = gen.asInput + val rdata = gen.asOutput + } + require(readLatency >= 0 && readLatency <= 2) + val data = gen + val colMux = if (2*data.width <= leaf.data.width && n > leaf.n) 1 << math.floor(math.log(leaf.data.width/data.width)/math.log(2)).toInt else 1 + val nWide = if (data.width > leaf.data.width) 1+(data.width-1)/leaf.data.width else 1 + val nDeep = if (n > colMux*leaf.n) 1+(n-1)/(colMux*leaf.n) else 1 + if (nDeep > 1 || colMux > 1) + require(isPow2(n) && isPow2(leaf.n)) + + val idx = io.addr(log2Up(n/nDeep/colMux)-1, 0) + val rdataDeep = Vec(nDeep) { Bits() } + val rdataSel = Vec(nDeep) { Bool() } + val cond = Vec(nDeep) { Bool() } + val ren = Vec(nDeep) { Bool() } + val reg_ren = Vec(nDeep) { Reg() { Bool() } } + val reg2_ren = Vec(nDeep) { Reg() { Bool() } } + val reg_raddr = Vec(nDeep) { Reg() { UFix() } } + val reg2_raddr = Vec(nDeep) { Reg() { UFix() } } + val renOut = Vec(nDeep) { Bool() } + val raddrOut = Vec(nDeep) { UFix() } + val rdata = Vec(nDeep) { Vec(nWide) { Bits() } } + val wdata = io.wdata.toBits + val wmask = io.wmask.toBits + for (i <- 0 until nDeep) { + cond(i) := (if (nDeep == 1) io.en else io.en && UFix(i) === io.addr(log2Up(n)-1, log2Up(n/nDeep))) + ren(i) := cond(i) && !io.rw + reg_ren(i) := ren(i) + reg2_ren(i) := reg_ren(i) + when (ren(i)) { reg_raddr(i) := io.addr } + when (reg_ren(i)) { reg2_raddr(i) := reg_raddr(i) } + renOut(i) := (if (readLatency > 1) reg2_ren(i) else if (readLatency > 0) reg_ren(i) else ren(i)) + raddrOut(i) := (if (readLatency > 1) reg2_raddr(i) else if (readLatency > 0) reg_raddr(i) else io.addr) + + for (j <- 0 until nWide) { + val mem = leaf.clone + var dout: Bits = null + val dout1 = if (readLatency > 0) Reg() { Bits() } else null + + var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) + if (colMux > 1) + wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(io.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) + val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) + when (cond(i)) { + when (io.rw) { mem.write(idx, wdata0, wmask0) } + .otherwise { if (readLatency > 0) dout1 := mem(idx) } + } + + if (readLatency == 0) { + dout = mem(idx) + } else if (readLatency == 1) { + dout = dout1 + } else { + val dout2 = Reg() { Bits() } + when (reg_ren(i)) { dout2 := dout1 } + dout = dout2 + } + + rdata(i)(j) := dout + } + val rdataWide = rdata(i).reduceLeft((x, y) => Cat(y, x)) + + var colMuxOut = rdataWide + if (colMux > 1) { + val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.width*(k+1)-1, gen.width*k))) { Bits() } + colMuxOut = colMuxIn(raddrOut(i)(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) + } + + rdataDeep(i) := colMuxOut + rdataSel(i) := renOut(i) + } + + io.rdata := Mux1H(rdataSel, rdataDeep) +} + +class LLCDataReq(ways: Int) extends MemReqCmd +{ + val way = UFix(width = log2Up(ways)) + val isWriteback = Bool() + + override def clone = new LLCDataReq(ways).asInstanceOf[this.type] +} + +class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component +{ + val io = new Bundle { + val cpu = (new FIFOIO) { new MemReqCmd }.flip + val repl_way = UFix(log2Up(ways), INPUT) + val repl_dirty = Bool(INPUT) + val repl_tag = UFix(PADDR_BITS - OFFSET_BITS - log2Up(sets), INPUT) + val data = (new FIFOIO) { new LLCDataReq(ways) } + val tag = (new PipeIO) { new Bundle { + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val way = UFix(width = log2Up(ways)) + } } + val mem = new ioMem + val mem_resp_set = UFix(log2Up(sets), OUTPUT) + val mem_resp_way = UFix(log2Up(ways), OUTPUT) + } + + class MSHR extends Bundle { + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val way = UFix(width = log2Up(ways)) + val tag = io.cpu.bits.tag.clone + val refilled = Bool() + val refillCount = UFix(width = log2Up(REFILL_CYCLES)) + val requested = Bool() + val old_dirty = Bool() + val old_tag = UFix(width = PADDR_BITS - OFFSET_BITS - log2Up(sets)) + + override def clone = new MSHR().asInstanceOf[this.type] + } + + val valid = Vec(outstanding) { Reg(resetVal = Bool(false)) } + val validBits = valid.toBits + val freeId = PriorityEncoder(~validBits) + val mshr = Vec(outstanding) { Reg() { new MSHR } } + when (io.cpu.valid && io.cpu.ready) { + valid(freeId) := Bool(true) + mshr(freeId).addr := io.cpu.bits.addr + mshr(freeId).tag := io.cpu.bits.tag + mshr(freeId).way := io.repl_way + mshr(freeId).old_dirty := io.repl_dirty + mshr(freeId).old_tag := io.repl_tag + mshr(freeId).requested := Bool(false) + mshr(freeId).refillCount := UFix(0) + mshr(freeId).refilled := Bool(false) + } + + val requests = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && !mshr(i).old_dirty && !mshr(i).requested):_*) + val request = requests.orR + val requestId = PriorityEncoder(requests) + when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { mshr(requestId).requested := Bool(true) } + + val refillId = io.mem.resp.bits.tag(log2Up(outstanding)-1, 0) + val refillCount = mshr(refillId).refillCount + when (io.mem.resp.valid) { + mshr(refillId).refillCount := refillCount + UFix(1) + when (refillCount === UFix(REFILL_CYCLES-1)) { mshr(refillId).refilled := Bool(true) } + } + + val replays = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).refilled):_*) + val replay = replays.orR + val replayId = PriorityEncoder(replays) + when (replay && io.data.ready) { valid(replayId) := Bool(false) } + + val writebacks = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).old_dirty):_*) + val writeback = writebacks.orR + val writebackId = PriorityEncoder(writebacks) + when (writeback && io.data.ready && !replay) { mshr(writebackId).old_dirty := Bool(false) } + + val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) + io.cpu.ready := !conflicts.orR && !validBits.andR + + io.data.valid := replay || writeback + io.data.bits.rw := Bool(false) + io.data.bits.tag := mshr(replayId).tag + io.data.bits.isWriteback := Bool(true) + io.data.bits.addr := Cat(mshr(writebackId).old_tag, mshr(writebackId).addr(log2Up(sets)-1, 0)).toUFix + io.data.bits.way := mshr(writebackId).way + when (replay) { + io.data.bits.isWriteback := Bool(false) + io.data.bits.addr := mshr(replayId).addr + io.data.bits.way := mshr(replayId).way + } + io.tag.valid := replay && io.data.ready + io.tag.bits.addr := io.data.bits.addr + io.tag.bits.way := io.data.bits.way + + io.mem.req_cmd.valid := request + io.mem.req_cmd.bits.rw := Bool(false) + io.mem.req_cmd.bits.addr := mshr(requestId).addr + io.mem.req_cmd.bits.tag := requestId + io.mem_resp_set := mshr(refillId).addr + io.mem_resp_way := mshr(refillId).way +} + +class LLCWriteback(requestors: Int) extends Component +{ + val io = new Bundle { + val req = Vec(requestors) { (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) }.flip } + val data = Vec(requestors) { (new FIFOIO) { new MemData }.flip } + val mem = new ioMem + } + + val valid = Reg(resetVal = Bool(false)) + val who = Reg() { UFix() } + val addr = Reg() { UFix() } + val cmd_sent = Reg() { Bool() } + val data_sent = Reg() { Bool() } + val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) + + var anyReq = Bool(false) + for (i <- 0 until requestors) { + io.req(i).ready := !valid && !anyReq + io.data(i).ready := valid && who === UFix(i) && io.mem.req_data.ready + anyReq = anyReq || io.req(i).valid + } + + val nextWho = PriorityEncoder(io.req.map(_.valid)) + when (!valid && io.req.map(_.valid).reduceLeft(_||_)) { + valid := Bool(true) + cmd_sent := Bool(false) + data_sent := Bool(false) + who := nextWho + addr := io.req(nextWho).bits + } + + when (io.mem.req_data.valid && io.mem.req_data.ready) { + count := count + UFix(1) + when (count === UFix(REFILL_CYCLES-1)) { + data_sent := Bool(true) + when (cmd_sent) { valid := Bool(false) } + } + } + when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { cmd_sent := Bool(true) } + when (valid && cmd_sent && data_sent) { valid := Bool(false) } + + io.mem.req_cmd.valid := valid && !cmd_sent + io.mem.req_cmd.bits.addr := addr + io.mem.req_cmd.bits.rw := Bool(true) + + io.mem.req_data.valid := valid && !data_sent && io.data(who).valid + io.mem.req_data.bits := io.data(who).bits +} + +class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component +{ + val io = new Bundle { + val req = (new FIFOIO) { new LLCDataReq(ways) }.flip + val req_data = (new FIFOIO) { new MemData }.flip + val writeback = (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) } + val writeback_data = (new FIFOIO) { new MemData } + val resp = (new PipeIO) { new MemResp } + val mem_resp = (new PipeIO) { new MemResp }.flip + val mem_resp_set = UFix(log2Up(sets), INPUT) + val mem_resp_way = UFix(log2Up(ways), INPUT) + } + + val data = new BigMem(sets*ways*REFILL_CYCLES, 2, leaf)(Bits(width = MEM_DATA_BITS)) + class QEntry extends MemResp { + val isWriteback = Bool() + override def clone = new QEntry().asInstanceOf[this.type] + } + val q = (new queue(4)) { new QEntry } + val qReady = q.io.count <= UFix(q.entries - 3) + val valid = Reg(resetVal = Bool(false)) + val req = Reg() { io.req.bits.clone } + val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) + val refillCount = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) + + when (data.io.en && !io.mem_resp.valid) { + count := count + UFix(1) + when (valid && count === UFix(REFILL_CYCLES-1)) { valid := Bool(false) } + } + when (io.req.valid && io.req.ready) { valid := Bool(true); req := io.req.bits } + when (io.mem_resp.valid) { refillCount := refillCount + UFix(1) } + + data.io.en := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) + data.io.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix + data.io.rw := io.req.bits.rw + data.io.wdata := io.req_data.bits.data + data.io.wmask := Fix(-1, io.req_data.bits.data.width) + when (valid) { + data.io.en := Mux(req.rw, io.req_data.valid, qReady) + data.io.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix + data.io.rw := req.rw + } + when (io.mem_resp.valid) { + data.io.en := Bool(true) + data.io.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix + data.io.rw := Bool(true) + data.io.wdata := io.mem_resp.bits.data + } + + q.io.enq.valid := Reg(Reg(data.io.en && !data.io.rw, resetVal = Bool(false)), resetVal = Bool(false)) + q.io.enq.bits.tag := Reg(Reg(Mux(valid, req.tag, io.req.bits.tag))) + q.io.enq.bits.data := data.io.rdata + q.io.enq.bits.isWriteback := Reg(Reg(Mux(valid, req.isWriteback, io.req.bits.isWriteback))) + + io.req.ready := !valid && Mux(io.req.bits.isWriteback, io.writeback.ready, Bool(true)) + io.req_data.ready := !io.mem_resp.valid && Mux(valid, req.rw, io.req.valid && io.req.bits.rw) + + io.writeback.valid := io.req.valid && io.req.ready && io.req.bits.isWriteback + io.writeback.bits := io.req.bits.addr + + q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, Bool(true)) + io.resp.valid := q.io.deq.valid && !q.io.deq.bits.isWriteback + io.resp.bits := q.io.deq.bits + io.writeback_data.valid := q.io.deq.valid && q.io.deq.bits.isWriteback + io.writeback_data.bits := q.io.deq.bits +} + +class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component +{ + val io = new Bundle { + val cpu = new ioMem().flip + val mem = new ioMem + } + + val tagWidth = PADDR_BITS - OFFSET_BITS - log2Up(sets) + val metaWidth = tagWidth + 2 // valid + dirty + + val memCmdArb = (new Arbiter(2)) { new MemReqCmd } + val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } + val mshr = new LLCMSHRFile(sets, ways, outstanding) + val tags = new BigMem(sets, 2, tagLeaf)(Bits(width = metaWidth*ways)) + val data = new LLCData(sets, ways, dataLeaf) + val writeback = new LLCWriteback(2) + + val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) + val initialize = !initCount(log2Up(sets)) + when (initialize) { initCount := initCount + UFix(1) } + + val stall_s1 = Bool() + val replay_s1 = Reg(resetVal = Bool(false)) + val s1_valid = Reg(io.cpu.req_cmd.valid && !stall_s1 || replay_s1, resetVal = Bool(false)) + replay_s1 := s1_valid && stall_s1 + val s1 = Reg() { new MemReqCmd } + when (io.cpu.req_cmd.valid && io.cpu.req_cmd.ready) { s1 := io.cpu.req_cmd.bits } + + tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || mshr.io.tag.valid + tags.io.addr := Mux(initialize, initCount, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr))(log2Up(sets)-1,0)) + tags.io.rw := initialize || mshr.io.tag.valid + tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(Bool(false), Bool(true), mshr.io.tag.bits.addr(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) + tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(mshr.io.tag.bits.way))) + + val stall_s2 = Bool() + val s2_valid = Reg(resetVal = Bool(false)) + s2_valid := s1_valid && !replay_s1 && !stall_s1 || stall_s2 + val s2 = Reg() { new MemReqCmd } + when (s1_valid && !stall_s1 && !replay_s1) { s2 := s1 } + val s2_tags = Vec(ways) { Bits(width = metaWidth) } + for (i <- 0 until ways) s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) + val s2_hits = s2_tags.map(t => t(tagWidth) && s2.addr(s2.addr.width-1, s2.addr.width-tagWidth) === t(tagWidth-1, 0)) + val s2_hit = s2_hits.reduceLeft(_||_) + stall_s1 := initialize || mshr.io.tag.valid || s2_valid && !s2_hit || stall_s2 + val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) + val repl_tag = s2_tags(repl_way).toUFix + + mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw + mshr.io.cpu.bits := s2 + mshr.io.repl_way := repl_way + mshr.io.repl_dirty := repl_tag(tagWidth).toBool + mshr.io.repl_tag := repl_tag + mshr.io.mem.resp := io.mem.resp + + data.io.req <> dataArb.io.out + data.io.mem_resp := io.mem.resp + data.io.mem_resp_set := mshr.io.mem_resp_set + data.io.mem_resp_way := mshr.io.mem_resp_way + data.io.req_data.bits := io.cpu.req_data.bits + + writeback.io.req(0) <> data.io.writeback + writeback.io.data(0) <> data.io.writeback_data + writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw + writeback.io.req(1).bits := s2.addr + writeback.io.data(1).valid := io.cpu.req_data.valid + writeback.io.data(1).bits := io.cpu.req_data.bits + data.io.req_data.valid := io.cpu.req_data.valid && !writeback.io.data(1).ready + + memCmdArb.io.in(0) <> mshr.io.mem.req_cmd + memCmdArb.io.in(1) <> writeback.io.mem.req_cmd + + dataArb.io.in(0) <> mshr.io.data + dataArb.io.in(1).valid := s2_valid && s2_hit + dataArb.io.in(1).bits := s2 + dataArb.io.in(1).bits.way := OHToUFix(s2_hits) + dataArb.io.in(1).bits.isWriteback := Bool(false) + + stall_s2 := s2_valid && !Mux(s2_hit, dataArb.io.in(1).ready, Mux(s2.rw, writeback.io.req(1).ready, mshr.io.cpu.ready)) + + io.cpu.resp <> data.io.resp + io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 + io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready + io.mem.req_cmd <> memCmdArb.io.out + io.mem.req_data <> writeback.io.mem.req_data +} From 62a3ea411385f65e8984acc14198b143f0606be1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Jul 2012 17:56:39 -0700 Subject: [PATCH 24/40] fix some LLC bugs --- uncore/llc.scala | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 71dfa536..e6f39c4a 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -103,7 +103,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val repl_dirty = Bool(INPUT) val repl_tag = UFix(PADDR_BITS - OFFSET_BITS - log2Up(sets), INPUT) val data = (new FIFOIO) { new LLCDataReq(ways) } - val tag = (new PipeIO) { new Bundle { + val tag = (new FIFOIO) { new Bundle { val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val way = UFix(width = log2Up(ways)) } } @@ -156,7 +156,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val replays = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).refilled):_*) val replay = replays.orR val replayId = PriorityEncoder(replays) - when (replay && io.data.ready) { valid(replayId) := Bool(false) } + when (replay && io.data.ready && io.tag.ready) { valid(replayId) := Bool(false) } val writebacks = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).old_dirty):_*) val writeback = writebacks.orR @@ -166,7 +166,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) io.cpu.ready := !conflicts.orR && !validBits.andR - io.data.valid := replay || writeback + io.data.valid := replay && io.tag.ready || writeback io.data.bits.rw := Bool(false) io.data.bits.tag := mshr(replayId).tag io.data.bits.isWriteback := Bool(true) @@ -318,7 +318,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val memCmdArb = (new Arbiter(2)) { new MemReqCmd } val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } val mshr = new LLCMSHRFile(sets, ways, outstanding) - val tags = new BigMem(sets, 2, tagLeaf)(Bits(width = metaWidth*ways)) + val tags = new BigMem(sets, 1, tagLeaf)(Bits(width = metaWidth*ways)) val data = new LLCData(sets, ways, dataLeaf) val writeback = new LLCWriteback(2) @@ -333,31 +333,38 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val s1 = Reg() { new MemReqCmd } when (io.cpu.req_cmd.valid && io.cpu.req_cmd.ready) { s1 := io.cpu.req_cmd.bits } - tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || mshr.io.tag.valid - tags.io.addr := Mux(initialize, initCount, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr))(log2Up(sets)-1,0)) - tags.io.rw := initialize || mshr.io.tag.valid - tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(Bool(false), Bool(true), mshr.io.tag.bits.addr(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) - tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(mshr.io.tag.bits.way))) - val stall_s2 = Bool() val s2_valid = Reg(resetVal = Bool(false)) s2_valid := s1_valid && !replay_s1 && !stall_s1 || stall_s2 val s2 = Reg() { new MemReqCmd } - when (s1_valid && !stall_s1 && !replay_s1) { s2 := s1 } - val s2_tags = Vec(ways) { Bits(width = metaWidth) } - for (i <- 0 until ways) s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) + val s2_tags = Vec(ways) { Reg() { Bits(width = metaWidth) } } + when (s1_valid && !stall_s1 && !replay_s1) { + s2 := s1 + for (i <- 0 until ways) + s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) + } val s2_hits = s2_tags.map(t => t(tagWidth) && s2.addr(s2.addr.width-1, s2.addr.width-tagWidth) === t(tagWidth-1, 0)) + val s2_hit_way = OHToUFix(s2_hits) val s2_hit = s2_hits.reduceLeft(_||_) - stall_s1 := initialize || mshr.io.tag.valid || s2_valid && !s2_hit || stall_s2 + val s2_hit_dirty = s2_tags(s2_hit_way)(tagWidth+1) val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) val repl_tag = s2_tags(repl_way).toUFix + val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty + stall_s1 := initialize || mshr.io.tag.valid || setDirty || s2_valid && !s2_hit || stall_s2 + + tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid + tags.io.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) + tags.io.rw := initialize || setDirty || mshr.io.tag.valid + tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) + tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw mshr.io.cpu.bits := s2 mshr.io.repl_way := repl_way - mshr.io.repl_dirty := repl_tag(tagWidth).toBool + mshr.io.repl_dirty := repl_tag(tagWidth+1, tagWidth).andR mshr.io.repl_tag := repl_tag mshr.io.mem.resp := io.mem.resp + mshr.io.tag.ready := !setDirty data.io.req <> dataArb.io.out data.io.mem_resp := io.mem.resp @@ -379,7 +386,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da dataArb.io.in(0) <> mshr.io.data dataArb.io.in(1).valid := s2_valid && s2_hit dataArb.io.in(1).bits := s2 - dataArb.io.in(1).bits.way := OHToUFix(s2_hits) + dataArb.io.in(1).bits.way := s2_hit_way dataArb.io.in(1).bits.isWriteback := Bool(false) stall_s2 := s2_valid && !Mux(s2_hit, dataArb.io.in(1).ready, Mux(s2.rw, writeback.io.req(1).ready, mshr.io.cpu.ready)) From a79747a06221802ac05c65144d3b8b42d5b7adb2 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Thu, 12 Jul 2012 18:12:49 -0700 Subject: [PATCH 25/40] INPUT/OUTPUT orderring swapped --- uncore/llc.scala | 14 +++++++------- uncore/uncore.scala | 30 +++++++++++++++--------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index e6f39c4a..5f5007d1 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -7,7 +7,7 @@ import Constants._ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component { val io = new Bundle { - val addr = UFix(log2Up(n), INPUT) + val addr = UFix(INPUT, log2Up(n)) val en = Bool(INPUT) val rw = Bool(INPUT) val wdata = gen.asInput @@ -99,17 +99,17 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component { val io = new Bundle { val cpu = (new FIFOIO) { new MemReqCmd }.flip - val repl_way = UFix(log2Up(ways), INPUT) + val repl_way = UFix(INPUT, log2Up(ways)) val repl_dirty = Bool(INPUT) - val repl_tag = UFix(PADDR_BITS - OFFSET_BITS - log2Up(sets), INPUT) + val repl_tag = UFix(INPUT, PADDR_BITS - OFFSET_BITS - log2Up(sets)) val data = (new FIFOIO) { new LLCDataReq(ways) } val tag = (new FIFOIO) { new Bundle { val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val way = UFix(width = log2Up(ways)) } } val mem = new ioMem - val mem_resp_set = UFix(log2Up(sets), OUTPUT) - val mem_resp_way = UFix(log2Up(ways), OUTPUT) + val mem_resp_set = UFix(OUTPUT, log2Up(sets)) + val mem_resp_way = UFix(OUTPUT, log2Up(ways)) } class MSHR extends Bundle { @@ -247,8 +247,8 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component val writeback_data = (new FIFOIO) { new MemData } val resp = (new PipeIO) { new MemResp } val mem_resp = (new PipeIO) { new MemResp }.flip - val mem_resp_set = UFix(log2Up(sets), INPUT) - val mem_resp_way = UFix(log2Up(ways), INPUT) + val mem_resp_set = UFix(INPUT, log2Up(sets)) + val mem_resp_way = UFix(INPUT, log2Up(ways)) } val data = new BigMem(sets*ways*REFILL_CYCLES, 2, leaf)(Bits(width = MEM_DATA_BITS)) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 6239981f..7e34492c 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -56,8 +56,8 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val p_data = (new PipeIO) { new TrackerProbeData }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) - val p_rep_cnt_dec = Bits(ntiles, INPUT) - val p_req_cnt_inc = Bits(ntiles, INPUT) + val p_rep_cnt_dec = Bits(INPUT, ntiles) + val p_req_cnt_inc = Bits(INPUT, ntiles) val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip val x_init_data = (new PipeIO) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) @@ -69,19 +69,19 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val mem_req_lock = Bool(OUTPUT) val probe_req = (new FIFOIO) { new ProbeRequest } val busy = Bool(OUTPUT) - val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) - val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) - val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val x_type = Bits(X_INIT_TYPE_MAX_BITS, OUTPUT) - val push_p_req = Bits(ntiles, OUTPUT) - val pop_p_rep = Bits(ntiles, OUTPUT) - val pop_p_rep_data = Bits(ntiles, OUTPUT) - val pop_p_rep_dep = Bits(ntiles, OUTPUT) - val pop_x_init = Bits(ntiles, OUTPUT) - val pop_x_init_data = Bits(ntiles, OUTPUT) - val pop_x_init_dep = Bits(ntiles, OUTPUT) + val addr = Bits(OUTPUT, PADDR_BITS - OFFSET_BITS) + val init_tile_id = Bits(OUTPUT, TILE_ID_BITS) + val p_rep_tile_id = Bits(OUTPUT, TILE_ID_BITS) + val tile_xact_id = Bits(OUTPUT, TILE_XACT_ID_BITS) + val sharer_count = Bits(OUTPUT, TILE_ID_BITS+1) + val x_type = Bits(OUTPUT, X_INIT_TYPE_MAX_BITS) + val push_p_req = Bits(OUTPUT, ntiles) + val pop_p_rep = Bits(OUTPUT, ntiles) + val pop_p_rep_data = Bits(OUTPUT, ntiles) + val pop_p_rep_dep = Bits(OUTPUT, ntiles) + val pop_x_init = Bits(OUTPUT, ntiles) + val pop_x_init_data = Bits(OUTPUT, ntiles) + val pop_x_init_dep = Bits(OUTPUT, ntiles) val send_x_rep_ack = Bool(OUTPUT) } From d01e70c6720310386c7c3f8708d24db7798da72d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Jul 2012 22:55:00 -0700 Subject: [PATCH 26/40] decouple all interfaces between tile and top also, add an "incoherent" bit to tilelink to indicate no probes needed --- uncore/llc.scala | 10 +++++----- uncore/uncore.scala | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 5f5007d1..1c9d49b8 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -107,7 +107,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val way = UFix(width = log2Up(ways)) } } - val mem = new ioMem + val mem = new ioMemPipe val mem_resp_set = UFix(OUTPUT, log2Up(sets)) val mem_resp_way = UFix(OUTPUT, log2Up(ways)) } @@ -194,7 +194,7 @@ class LLCWriteback(requestors: Int) extends Component val io = new Bundle { val req = Vec(requestors) { (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) }.flip } val data = Vec(requestors) { (new FIFOIO) { new MemData }.flip } - val mem = new ioMem + val mem = new ioMemPipe } val valid = Reg(resetVal = Bool(false)) @@ -245,7 +245,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component val req_data = (new FIFOIO) { new MemData }.flip val writeback = (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) } val writeback_data = (new FIFOIO) { new MemData } - val resp = (new PipeIO) { new MemResp } + val resp = (new FIFOIO) { new MemResp } val mem_resp = (new PipeIO) { new MemResp }.flip val mem_resp_set = UFix(INPUT, log2Up(sets)) val mem_resp_way = UFix(INPUT, log2Up(ways)) @@ -298,7 +298,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback.valid := io.req.valid && io.req.ready && io.req.bits.isWriteback io.writeback.bits := io.req.bits.addr - q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, Bool(true)) + q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, io.resp.ready) io.resp.valid := q.io.deq.valid && !q.io.deq.bits.isWriteback io.resp.bits := q.io.deq.bits io.writeback_data.valid := q.io.deq.valid && q.io.deq.bits.isWriteback @@ -309,7 +309,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da { val io = new Bundle { val cpu = new ioMem().flip - val mem = new ioMem + val mem = new ioMemPipe } val tagWidth = PADDR_BITS - OFFSET_BITS - log2Up(sets) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 7e34492c..ead75252 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -20,6 +20,13 @@ class MemResp () extends MemData } class ioMem() extends Bundle +{ + val req_cmd = (new FIFOIO) { new MemReqCmd() } + val req_data = (new FIFOIO) { new MemData() } + val resp = (new FIFOIO) { new MemResp() }.flip +} + +class ioMemPipe() extends Bundle { val req_cmd = (new FIFOIO) { new MemReqCmd() } val req_data = (new FIFOIO) { new MemData() } @@ -46,8 +53,9 @@ class ioTileLink extends Bundle { val probe_req = (new FIFOIO) { new ProbeRequest }.flip val probe_rep = (new FIFOIO) { new ProbeReply } val probe_rep_data = (new FIFOIO) { new ProbeReplyData } - val xact_rep = (new PipeIO) { new TransactionReply }.flip + val xact_rep = (new FIFOIO) { new TransactionReply }.flip val xact_finish = (new FIFOIO) { new TransactionFinish } + val incoherent = Bool(OUTPUT) } class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { @@ -58,6 +66,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(INPUT, ntiles) val p_req_cnt_inc = Bits(INPUT, ntiles) + val tile_incoherent = Bits(INPUT, ntiles) val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip val x_init_data = (new PipeIO) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) @@ -169,7 +178,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only + val p_req_initial_flags = ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id)) //TODO: Broadcast only p_req_flags := p_req_initial_flags(ntiles-1,0) mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) @@ -310,6 +319,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH t.p_data.valid := p_data_valid_arr(i) t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits + t.tile_incoherent := (Vec(io.tiles.map(_.incoherent)) { Bool() }).toBits t.sent_x_rep_ack := sent_x_rep_ack_arr(i) do_free_arr(i) := Bool(false) sent_x_rep_ack_arr(i) := Bool(false) @@ -360,8 +370,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } } } - // If there were a ready signal due to e.g. intervening network use: - //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready + io.mem.resp.ready := io.tiles(init_tile_id_arr(mem_idx)).xact_rep.ready // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests From df8aff090672eb388bf165fc85c45e5ebd7fd0fd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 22 Jul 2012 21:05:52 -0700 Subject: [PATCH 27/40] don't dequeue probe queue during reset --- uncore/uncore.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index ead75252..6ea359ce 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -140,6 +140,8 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) val mem_cnt_next = mem_cnt + UFix(1) val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) + val p_req_initial_flags = Bits(width = ntiles) + p_req_initial_flags := ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0))) //TODO: Broadcast only io.busy := state != s_idle io.addr := addr_ @@ -178,15 +180,14 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - val p_req_initial_flags = ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id)) //TODO: Broadcast only - p_req_flags := p_req_initial_flags(ntiles-1,0) + p_req_flags := p_req_initial_flags mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id if(ntiles > 1) { - p_rep_count := UFix(ntiles-1) - state := Mux(p_req_initial_flags(ntiles-1,0).orR, s_probe, s_mem) + p_rep_count := PopCount(p_req_initial_flags) + state := Mux(p_req_initial_flags.orR, s_probe, s_mem) } else state := s_mem } } From 77364057267769be338f82a13e0e2c9a223afbb5 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 23 Jul 2012 20:56:55 -0700 Subject: [PATCH 28/40] fix bug in coherence hub, respect xact_rep.ready --- uncore/uncore.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 6ea359ce..2035453c 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -367,7 +367,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH rep.bits.global_xact_id := ack_idx when (UFix(j) === init_tile_id_arr(ack_idx)) { rep.valid := send_x_rep_ack_arr.toBits.orR - sent_x_rep_ack_arr(ack_idx) := Bool(true) + sent_x_rep_ack_arr(ack_idx) := rep.ready } } } From 1ae3091261f64e1f8e6de7f9e9e2afc4b3c855ce Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jul 2012 00:03:55 -0700 Subject: [PATCH 29/40] memory system bug fixes --- uncore/llc.scala | 4 ++-- uncore/uncore.scala | 33 ++++++++++++++++++--------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 1c9d49b8..d95aeff7 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -378,7 +378,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da writeback.io.req(1).bits := s2.addr writeback.io.data(1).valid := io.cpu.req_data.valid writeback.io.data(1).bits := io.cpu.req_data.bits - data.io.req_data.valid := io.cpu.req_data.valid && !writeback.io.data(1).ready + data.io.req_data.valid := io.cpu.req_data.valid && writeback.io.req(1).ready memCmdArb.io.in(0) <> mshr.io.mem.req_cmd memCmdArb.io.in(1) <> writeback.io.mem.req_cmd @@ -393,7 +393,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da io.cpu.resp <> data.io.resp io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 - io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready + io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready && writeback.io.req(1).ready io.mem.req_cmd <> memCmdArb.io.out io.mem.req_data <> writeback.io.mem.req_data } diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 2035453c..5c58f151 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -95,21 +95,25 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { } def doMemReqWrite(req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: PipeIO[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.valid := !cmd_sent && data.valid && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) - req_data.valid := data.valid && at_front_of_dep_queue req_data.bits := data.bits - lock := data.valid && at_front_of_dep_queue when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) } - when(req_data.ready && at_front_of_dep_queue) { - pop_data := UFix(1) << tile_id - when (data.valid) { - mem_cnt := mem_cnt_next - when(mem_cnt_next === UFix(0)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) + when (at_front_of_dep_queue) { + req_cmd.valid := !cmd_sent && req_data.ready + lock := Bool(true) + when (req_cmd.ready || cmd_sent) { + req_data.valid := data.valid + when(req_data.ready) { + pop_data := UFix(1) << tile_id + when (data.valid) { + mem_cnt := mem_cnt_next + when(mem_cnt === UFix(REFILL_CYCLES-1)) { + pop_dep := UFix(1) << tile_id + trigger := Bool(false) + } + } } } } @@ -141,7 +145,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val mem_cnt_next = mem_cnt + UFix(1) val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) val p_req_initial_flags = Bits(width = ntiles) - p_req_initial_flags := ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0))) //TODO: Broadcast only + p_req_initial_flags := (if (ntiles == 1) Bits(0) else ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0)))) //TODO: Broadcast only io.busy := state != s_idle io.addr := addr_ @@ -376,16 +380,15 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests // and once we have picked a request, get the right write data - val mem_req_cmd_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemReqCmd() } + val mem_req_cmd_arb = (new Arbiter(NGLOBAL_XACTS)) { new MemReqCmd() } val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } for( i <- 0 until NGLOBAL_XACTS ) { mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd - mem_req_cmd_arb.io.lock(i) <> trackerList(i).io.mem_req_lock mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock } - io.mem.req_cmd <> mem_req_cmd_arb.io.out - io.mem.req_data <> mem_req_data_arb.io.out + io.mem.req_cmd <> Queue(mem_req_cmd_arb.io.out) + io.mem.req_data <> Queue(mem_req_data_arb.io.out) // Handle probe replies, which may or may not have data for( j <- 0 until ntiles ) { From 465f2efca71727aa952bf157f22ca27ba72d457f Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 27 Jul 2012 18:44:17 -0700 Subject: [PATCH 30/40] add reset pin to llc --- uncore/llc.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index d95aeff7..739fd791 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -305,7 +305,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback_data.bits := q.io.deq.bits } -class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component +class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits], resetSignal: Bool = null) extends Component(resetSignal) { val io = new Bundle { val cpu = new ioMem().flip From 4d4e28c1387b309e9247ba3da94a56b7e5da4dcf Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 28 Jul 2012 21:14:33 -0700 Subject: [PATCH 31/40] remove reset pin on llc --- uncore/llc.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 739fd791..d95aeff7 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -305,7 +305,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback_data.bits := q.io.deq.bits } -class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits], resetSignal: Bool = null) extends Component(resetSignal) +class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component { val io = new Bundle { val cpu = new ioMem().flip From 8db233c9b721abf66a47caf2d79c4e5c97c9a8fd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jul 2012 20:12:11 -0700 Subject: [PATCH 32/40] further pipeline the LLC --- uncore/llc.scala | 84 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index d95aeff7..89fc5e2f 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -14,7 +14,6 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex val wmask = gen.asInput val rdata = gen.asOutput } - require(readLatency >= 0 && readLatency <= 2) val data = gen val colMux = if (2*data.width <= leaf.data.width && n > leaf.n) 1 << math.floor(math.log(leaf.data.width/data.width)/math.log(2)).toInt else 1 val nWide = if (data.width > leaf.data.width) 1+(data.width-1)/leaf.data.width else 1 @@ -28,9 +27,6 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex val cond = Vec(nDeep) { Bool() } val ren = Vec(nDeep) { Bool() } val reg_ren = Vec(nDeep) { Reg() { Bool() } } - val reg2_ren = Vec(nDeep) { Reg() { Bool() } } - val reg_raddr = Vec(nDeep) { Reg() { UFix() } } - val reg2_raddr = Vec(nDeep) { Reg() { UFix() } } val renOut = Vec(nDeep) { Bool() } val raddrOut = Vec(nDeep) { UFix() } val rdata = Vec(nDeep) { Vec(nWide) { Bits() } } @@ -40,11 +36,14 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex cond(i) := (if (nDeep == 1) io.en else io.en && UFix(i) === io.addr(log2Up(n)-1, log2Up(n/nDeep))) ren(i) := cond(i) && !io.rw reg_ren(i) := ren(i) - reg2_ren(i) := reg_ren(i) - when (ren(i)) { reg_raddr(i) := io.addr } - when (reg_ren(i)) { reg2_raddr(i) := reg_raddr(i) } - renOut(i) := (if (readLatency > 1) reg2_ren(i) else if (readLatency > 0) reg_ren(i) else ren(i)) - raddrOut(i) := (if (readLatency > 1) reg2_raddr(i) else if (readLatency > 0) reg_raddr(i) else io.addr) + + renOut(i) := ren(i) + raddrOut(i) := io.addr + if (readLatency > 0) { + val r = Pipe(ren(i), io.addr, readLatency) + renOut(i) := r.valid + raddrOut(i) := r.bits + } for (j <- 0 until nWide) { val mem = leaf.clone @@ -64,11 +63,8 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex dout = mem(idx) } else if (readLatency == 1) { dout = dout1 - } else { - val dout2 = Reg() { Bits() } - when (reg_ren(i)) { dout2 := dout1 } - dout = dout2 - } + } else + dout = Pipe(reg_ren(i), dout1, readLatency-1).bits rdata(i)(j) := dout } @@ -238,7 +234,7 @@ class LLCWriteback(requestors: Int) extends Component io.mem.req_data.bits := io.data(who).bits } -class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component +class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Component { val io = new Bundle { val req = (new FIFOIO) { new LLCDataReq(ways) }.flip @@ -251,13 +247,13 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component val mem_resp_way = UFix(INPUT, log2Up(ways)) } - val data = new BigMem(sets*ways*REFILL_CYCLES, 2, leaf)(Bits(width = MEM_DATA_BITS)) + val data = new BigMem(sets*ways*REFILL_CYCLES, latency, leaf)(Bits(width = MEM_DATA_BITS)) class QEntry extends MemResp { val isWriteback = Bool() override def clone = new QEntry().asInstanceOf[this.type] } - val q = (new queue(4)) { new QEntry } - val qReady = q.io.count <= UFix(q.entries - 3) + val q = (new queue(latency+2)) { new QEntry } + val qReady = q.io.count <= UFix(q.entries-latency-1) val valid = Reg(resetVal = Bool(false)) val req = Reg() { io.req.bits.clone } val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) @@ -287,10 +283,11 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component data.io.wdata := io.mem_resp.bits.data } - q.io.enq.valid := Reg(Reg(data.io.en && !data.io.rw, resetVal = Bool(false)), resetVal = Bool(false)) - q.io.enq.bits.tag := Reg(Reg(Mux(valid, req.tag, io.req.bits.tag))) + val tagPipe = Pipe(data.io.en && !data.io.rw, Mux(valid, req.tag, io.req.bits.tag), latency) + q.io.enq.valid := tagPipe.valid + q.io.enq.bits.tag := tagPipe.bits + q.io.enq.bits.isWriteback := Pipe(Mux(valid, req.isWriteback, io.req.bits.isWriteback), Bool(false), latency).valid q.io.enq.bits.data := data.io.rdata - q.io.enq.bits.isWriteback := Reg(Reg(Mux(valid, req.isWriteback, io.req.bits.isWriteback))) io.req.ready := !valid && Mux(io.req.bits.isWriteback, io.writeback.ready, Bool(true)) io.req_data.ready := !io.mem_resp.valid && Mux(valid, req.rw, io.req.valid && io.req.bits.rw) @@ -305,6 +302,49 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback_data.bits := q.io.deq.bits } +class MemReqArb(n: Int) extends Component // UNTESTED +{ + val io = new Bundle { + val cpu = Vec(n) { new ioMem().flip } + val mem = new ioMem + } + + val lock = Reg(resetVal = Bool(false)) + val locker = Reg() { UFix() } + + val arb = new RRArbiter(n)(new MemReqCmd) + val respWho = io.mem.resp.bits.tag(log2Up(n)-1,0) + val respTag = io.mem.resp.bits.tag >> UFix(log2Up(n)) + for (i <- 0 until n) { + val me = UFix(i, log2Up(n)) + arb.io.in(i).valid := io.cpu(i).req_cmd.valid + arb.io.in(i).bits := io.cpu(i).req_cmd.bits + arb.io.in(i).bits.tag := Cat(io.cpu(i).req_cmd.bits.tag, me) + io.cpu(i).req_cmd.ready := arb.io.in(i).ready + io.cpu(i).req_data.ready := Bool(false) + + val getLock = io.cpu(i).req_cmd.fire() && io.cpu(i).req_cmd.bits.rw && !lock + val haveLock = lock && locker === me + when (getLock) { + lock := Bool(true) + locker := UFix(i) + } + when (getLock || haveLock) { + io.cpu(i).req_data.ready := io.mem.req_data.ready + io.mem.req_data.valid := Bool(true) + io.mem.req_data.bits := io.cpu(i).req_data.bits + } + + io.cpu(i).resp.valid := io.mem.resp.valid && respWho === me + io.cpu(i).resp.bits := io.mem.resp.bits + io.cpu(i).resp.bits.tag := respTag + } + io.mem.resp.ready := io.cpu(respWho).resp.ready + + val unlock = Counter(io.mem.req_data.fire(), REFILL_CYCLES)._2 + when (unlock) { lock := Bool(false) } +} + class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component { val io = new Bundle { @@ -319,7 +359,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } val mshr = new LLCMSHRFile(sets, ways, outstanding) val tags = new BigMem(sets, 1, tagLeaf)(Bits(width = metaWidth*ways)) - val data = new LLCData(sets, ways, dataLeaf) + val data = new LLCData(3, sets, ways, dataLeaf) val writeback = new LLCWriteback(2) val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) From 7a75334bb97d103f7c4ff1c3b936434c5683bebf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 31 Jul 2012 17:44:53 -0700 Subject: [PATCH 33/40] pipeline LLC further --- uncore/llc.scala | 104 ++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 89fc5e2f..546da774 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -4,14 +4,17 @@ import Chisel._ import Node._ import Constants._ -class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component +class BigMem[T <: Data](n: Int, preLatency: Int, postLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component { - val io = new Bundle { + class Inputs extends Bundle { val addr = UFix(INPUT, log2Up(n)) - val en = Bool(INPUT) val rw = Bool(INPUT) val wdata = gen.asInput val wmask = gen.asInput + override def clone = new Inputs().asInstanceOf[this.type] + } + val io = new Bundle { + val in = new PipeIO()(new Inputs).flip val rdata = gen.asOutput } val data = gen @@ -21,63 +24,52 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex if (nDeep > 1 || colMux > 1) require(isPow2(n) && isPow2(leaf.n)) - val idx = io.addr(log2Up(n/nDeep/colMux)-1, 0) val rdataDeep = Vec(nDeep) { Bits() } val rdataSel = Vec(nDeep) { Bool() } - val cond = Vec(nDeep) { Bool() } - val ren = Vec(nDeep) { Bool() } - val reg_ren = Vec(nDeep) { Reg() { Bool() } } - val renOut = Vec(nDeep) { Bool() } - val raddrOut = Vec(nDeep) { UFix() } - val rdata = Vec(nDeep) { Vec(nWide) { Bits() } } - val wdata = io.wdata.toBits - val wmask = io.wmask.toBits for (i <- 0 until nDeep) { - cond(i) := (if (nDeep == 1) io.en else io.en && UFix(i) === io.addr(log2Up(n)-1, log2Up(n/nDeep))) - ren(i) := cond(i) && !io.rw - reg_ren(i) := ren(i) + val in = Pipe(io.in.valid && (if (nDeep == 1) Bool(true) else UFix(i) === io.in.bits.addr(log2Up(n)-1, log2Up(n/nDeep))), io.in.bits, preLatency) + val idx = in.bits.addr(log2Up(n/nDeep/colMux)-1, 0) + val wdata = in.bits.wdata.toBits + val wmask = in.bits.wmask.toBits + val ren = in.valid && !in.bits.rw + val reg_ren = Reg(ren) + val rdata = Vec(nWide) { Bits() } - renOut(i) := ren(i) - raddrOut(i) := io.addr - if (readLatency > 0) { - val r = Pipe(ren(i), io.addr, readLatency) - renOut(i) := r.valid - raddrOut(i) := r.bits - } + val r = Pipe(ren, in.bits.addr, postLatency) for (j <- 0 until nWide) { val mem = leaf.clone var dout: Bits = null - val dout1 = if (readLatency > 0) Reg() { Bits() } else null + val dout1 = if (postLatency > 0) Reg() { Bits() } else null var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) if (colMux > 1) - wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(io.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) + wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(in.bits.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) - when (cond(i)) { - when (io.rw) { mem.write(idx, wdata0, wmask0) } - .otherwise { if (readLatency > 0) dout1 := mem(idx) } + when (in.valid) { + when (in.bits.rw) { mem.write(idx, wdata0, wmask0) } + .otherwise { if (postLatency > 0) dout1 := mem(idx) } } - if (readLatency == 0) { + if (postLatency == 0) { dout = mem(idx) - } else if (readLatency == 1) { + } else if (postLatency == 1) { dout = dout1 } else - dout = Pipe(reg_ren(i), dout1, readLatency-1).bits + dout = Pipe(reg_ren, dout1, postLatency-1).bits - rdata(i)(j) := dout + rdata(j) := dout } - val rdataWide = rdata(i).reduceLeft((x, y) => Cat(y, x)) + val rdataWide = rdata.reduceLeft((x, y) => Cat(y, x)) var colMuxOut = rdataWide if (colMux > 1) { val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.width*(k+1)-1, gen.width*k))) { Bits() } - colMuxOut = colMuxIn(raddrOut(i)(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) + colMuxOut = colMuxIn(r.bits(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) } rdataDeep(i) := colMuxOut - rdataSel(i) := renOut(i) + rdataSel(i) := r.valid } io.rdata := Mux1H(rdataSel, rdataDeep) @@ -247,7 +239,7 @@ class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Compo val mem_resp_way = UFix(INPUT, log2Up(ways)) } - val data = new BigMem(sets*ways*REFILL_CYCLES, latency, leaf)(Bits(width = MEM_DATA_BITS)) + val data = new BigMem(sets*ways*REFILL_CYCLES, 1, latency-1, leaf)(Bits(width = MEM_DATA_BITS)) class QEntry extends MemResp { val isWriteback = Bool() override def clone = new QEntry().asInstanceOf[this.type] @@ -259,31 +251,31 @@ class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Compo val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val refillCount = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - when (data.io.en && !io.mem_resp.valid) { + when (data.io.in.valid && !io.mem_resp.valid) { count := count + UFix(1) when (valid && count === UFix(REFILL_CYCLES-1)) { valid := Bool(false) } } when (io.req.valid && io.req.ready) { valid := Bool(true); req := io.req.bits } when (io.mem_resp.valid) { refillCount := refillCount + UFix(1) } - data.io.en := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) - data.io.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix - data.io.rw := io.req.bits.rw - data.io.wdata := io.req_data.bits.data - data.io.wmask := Fix(-1, io.req_data.bits.data.width) + data.io.in.valid := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) + data.io.in.bits.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix + data.io.in.bits.rw := io.req.bits.rw + data.io.in.bits.wdata := io.req_data.bits.data + data.io.in.bits.wmask := Fix(-1, io.req_data.bits.data.width) when (valid) { - data.io.en := Mux(req.rw, io.req_data.valid, qReady) - data.io.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix - data.io.rw := req.rw + data.io.in.valid := Mux(req.rw, io.req_data.valid, qReady) + data.io.in.bits.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix + data.io.in.bits.rw := req.rw } when (io.mem_resp.valid) { - data.io.en := Bool(true) - data.io.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix - data.io.rw := Bool(true) - data.io.wdata := io.mem_resp.bits.data + data.io.in.valid := Bool(true) + data.io.in.bits.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix + data.io.in.bits.rw := Bool(true) + data.io.in.bits.wdata := io.mem_resp.bits.data } - val tagPipe = Pipe(data.io.en && !data.io.rw, Mux(valid, req.tag, io.req.bits.tag), latency) + val tagPipe = Pipe(data.io.in.valid && !data.io.in.bits.rw, Mux(valid, req.tag, io.req.bits.tag), latency) q.io.enq.valid := tagPipe.valid q.io.enq.bits.tag := tagPipe.bits q.io.enq.bits.isWriteback := Pipe(Mux(valid, req.isWriteback, io.req.bits.isWriteback), Bool(false), latency).valid @@ -358,8 +350,8 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val memCmdArb = (new Arbiter(2)) { new MemReqCmd } val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } val mshr = new LLCMSHRFile(sets, ways, outstanding) - val tags = new BigMem(sets, 1, tagLeaf)(Bits(width = metaWidth*ways)) - val data = new LLCData(3, sets, ways, dataLeaf) + val tags = new BigMem(sets, 0, 1, tagLeaf)(Bits(width = metaWidth*ways)) + val data = new LLCData(4, sets, ways, dataLeaf) val writeback = new LLCWriteback(2) val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) @@ -392,11 +384,11 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty stall_s1 := initialize || mshr.io.tag.valid || setDirty || s2_valid && !s2_hit || stall_s2 - tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid - tags.io.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) - tags.io.rw := initialize || setDirty || mshr.io.tag.valid - tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) - tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) + tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid + tags.io.in.bits.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) + tags.io.in.bits.rw := initialize || setDirty || mshr.io.tag.valid + tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) + tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw mshr.io.cpu.bits := s2 From e346f21725e3de2ac2939db11e644f29e9e3ac3b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Aug 2012 18:59:37 -0700 Subject: [PATCH 34/40] fix control bug in LLC structural hazard on tag ram caused deadlock --- uncore/llc.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 546da774..eece03ed 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -154,13 +154,14 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) io.cpu.ready := !conflicts.orR && !validBits.andR - io.data.valid := replay && io.tag.ready || writeback + io.data.valid := writeback io.data.bits.rw := Bool(false) io.data.bits.tag := mshr(replayId).tag io.data.bits.isWriteback := Bool(true) io.data.bits.addr := Cat(mshr(writebackId).old_tag, mshr(writebackId).addr(log2Up(sets)-1, 0)).toUFix io.data.bits.way := mshr(writebackId).way when (replay) { + io.data.valid := io.tag.ready io.data.bits.isWriteback := Bool(false) io.data.bits.addr := mshr(replayId).addr io.data.bits.way := mshr(replayId).way From 875f3622af6520e2985312b07aa0d2ca65732403 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Aug 2012 19:00:03 -0700 Subject: [PATCH 35/40] fix deadlock in coherence hub --- uncore/uncore.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 5c58f151..a1726242 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -101,8 +101,8 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { cmd_sent := Bool(true) } when (at_front_of_dep_queue) { - req_cmd.valid := !cmd_sent && req_data.ready - lock := Bool(true) + req_cmd.valid := !cmd_sent && req_data.ready && data.valid + lock := data.valid || cmd_sent when (req_cmd.ready || cmd_sent) { req_data.valid := data.valid when(req_data.ready) { From 17dc2075dd2c73b9446eb669a41d45aefd718ff1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 6 Aug 2012 17:10:04 -0700 Subject: [PATCH 36/40] fix some LLC control bugs --- uncore/llc.scala | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index eece03ed..64d48ce8 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -383,15 +383,20 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) val repl_tag = s2_tags(repl_way).toUFix val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty - stall_s1 := initialize || mshr.io.tag.valid || setDirty || s2_valid && !s2_hit || stall_s2 + stall_s1 := initialize || stall_s2 - tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid - tags.io.in.bits.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) - tags.io.in.bits.rw := initialize || setDirty || mshr.io.tag.valid - tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) - tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) + val tag_we = setDirty || mshr.io.tag.valid + val tag_waddr = Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(log2Up(sets)-1,0) + val tag_wdata = Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)) + val tag_wway = Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way) + tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || tag_we + tags.io.in.bits.addr := Mux(initialize, initCount, Mux(tag_we, tag_waddr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)(log2Up(sets)-1,0))) + tags.io.in.bits.rw := initialize || tag_we + tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, tag_wdata)) + tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(tag_wway))) + when (tag_we && Mux(stall_s2, s2.addr, s1.addr)(log2Up(sets)-1,0) === tag_waddr) { s2_tags(tag_wway) := tag_wdata } - mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw + mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw && dataArb.io.in(1).ready && writeback.io.req(0).ready // stall_s2 mshr.io.cpu.bits := s2 mshr.io.repl_way := repl_way mshr.io.repl_dirty := repl_tag(tagWidth+1, tagWidth).andR @@ -404,29 +409,29 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da data.io.mem_resp_set := mshr.io.mem_resp_set data.io.mem_resp_way := mshr.io.mem_resp_way data.io.req_data.bits := io.cpu.req_data.bits + data.io.req_data.valid := io.cpu.req_data.valid writeback.io.req(0) <> data.io.writeback writeback.io.data(0) <> data.io.writeback_data - writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw + writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw && dataArb.io.in(1).ready && mshr.io.cpu.ready // stall_s2 writeback.io.req(1).bits := s2.addr writeback.io.data(1).valid := io.cpu.req_data.valid writeback.io.data(1).bits := io.cpu.req_data.bits - data.io.req_data.valid := io.cpu.req_data.valid && writeback.io.req(1).ready memCmdArb.io.in(0) <> mshr.io.mem.req_cmd memCmdArb.io.in(1) <> writeback.io.mem.req_cmd dataArb.io.in(0) <> mshr.io.data - dataArb.io.in(1).valid := s2_valid && s2_hit + dataArb.io.in(1).valid := s2_valid && s2_hit && writeback.io.req(0).ready && mshr.io.cpu.ready // stall_s2 dataArb.io.in(1).bits := s2 dataArb.io.in(1).bits.way := s2_hit_way dataArb.io.in(1).bits.isWriteback := Bool(false) - stall_s2 := s2_valid && !Mux(s2_hit, dataArb.io.in(1).ready, Mux(s2.rw, writeback.io.req(1).ready, mshr.io.cpu.ready)) + stall_s2 := s2_valid && !(dataArb.io.in(1).ready && writeback.io.req(0).ready && mshr.io.cpu.ready) io.cpu.resp <> data.io.resp io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 - io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready && writeback.io.req(1).ready + io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready io.mem.req_cmd <> memCmdArb.io.out io.mem.req_data <> writeback.io.mem.req_data } From 6546dc84e2e53f5ec4e5b261217981e179f67ce6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Aug 2012 22:11:32 -0700 Subject: [PATCH 37/40] rename queue to Queue fixes build with case-insensitive file system --- uncore/llc.scala | 2 +- uncore/memserdes.scala | 2 +- uncore/slowio.scala | 4 ++-- uncore/uncore.scala | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/uncore/llc.scala b/uncore/llc.scala index 64d48ce8..a2a5d361 100644 --- a/uncore/llc.scala +++ b/uncore/llc.scala @@ -245,7 +245,7 @@ class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Compo val isWriteback = Bool() override def clone = new QEntry().asInstanceOf[this.type] } - val q = (new queue(latency+2)) { new QEntry } + val q = (new Queue(latency+2)) { new QEntry } val qReady = q.io.count <= UFix(q.entries-latency-1) val valid = Reg(resetVal = Bool(false)) val req = Reg() { io.req.bits.clone } diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index e20f1ec3..cdd109c2 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -149,7 +149,7 @@ class MemDessert extends Component // test rig side io.wide.req_data.valid := state === s_data io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) - val dataq = (new queue(REFILL_CYCLES)) { new MemResp } + val dataq = (new Queue(REFILL_CYCLES)) { new MemResp } dataq.io.enq <> io.wide.resp dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) diff --git a/uncore/slowio.scala b/uncore/slowio.scala index 6cf5a3d9..068e90c5 100644 --- a/uncore/slowio.scala +++ b/uncore/slowio.scala @@ -28,12 +28,12 @@ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) val out_slow_val = Reg(resetVal = Bool(false)) val out_slow_bits = Reg() { data } - val fromhost_q = new queue(1)(data) + val fromhost_q = new Queue(1)(data) fromhost_q.io.enq.valid := in_en && (io.in_slow.valid && in_slow_rdy || reset) fromhost_q.io.enq.bits := io.in_slow.bits fromhost_q.io.deq <> io.in_fast - val tohost_q = new queue(1)(data) + val tohost_q = new Queue(1)(data) tohost_q.io.enq <> io.out_fast tohost_q.io.deq.ready := in_en && io.out_slow.ready && out_slow_val diff --git a/uncore/uncore.scala b/uncore/uncore.scala index a1726242..7ffef155 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -336,8 +336,8 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } } - val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY + val p_rep_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY // Free finished transactions for( j <- 0 until ntiles ) { From da6ec486f1e2cfbb0eb501c29262e05d3e52014d Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 27 Sep 2012 16:46:36 -0700 Subject: [PATCH 38/40] uncore and rocket changes for new xact types --- uncore/uncore.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/uncore/uncore.scala b/uncore/uncore.scala index 7ffef155..84b74366 100644 --- a/uncore/uncore.scala +++ b/uncore/uncore.scala @@ -3,14 +3,17 @@ package rocket import Chisel._ import Constants._ +class PhysicalAddress extends Bundle { + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) +} + class MemData extends Bundle { val data = Bits(width = MEM_DATA_BITS) } -class MemReqCmd() extends Bundle +class MemReqCmd() extends PhysicalAddress { val rw = Bool() - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val tag = Bits(width = MEM_TAG_BITS) } @@ -165,7 +168,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { io.probe_req.valid := Bool(false) io.probe_req.bits.p_type := co.getProbeRequestType(x_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) - io.probe_req.bits.address := addr_ + io.probe_req.bits.addr := addr_ io.push_p_req := Bits(0, width = ntiles) io.pop_p_rep := Bits(0, width = ntiles) io.pop_p_rep_data := Bits(0, width = ntiles) @@ -178,7 +181,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { switch (state) { is(s_idle) { when( io.alloc_req.valid && io.can_alloc ) { - addr_ := io.alloc_req.bits.xact_init.address + addr_ := io.alloc_req.bits.xact_init.addr x_type_ := io.alloc_req.bits.xact_init.x_type init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id @@ -272,7 +275,7 @@ class CoherenceHubNull(co: ThreeStateIncoherence) extends CoherenceHub(1, co) io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id - io.mem.req_cmd.bits.addr := x_init.bits.address + io.mem.req_cmd.bits.addr := x_init.bits.addr io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep @@ -432,7 +435,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val conflicts = Vec(NGLOBAL_XACTS) { Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.address) + conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.addr) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && co.messageHasData(x_init.bits))) From cf8f20584e21560b99970b8edffefa79569dd125 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Mon, 1 Oct 2012 16:08:41 -0700 Subject: [PATCH 39/40] factoring out uncore into separate uncore repo --- uncore/llc.scala | 437 ---------------------------------- uncore/memserdes.scala | 1 + uncore/uncore.scala | 519 ----------------------------------------- 3 files changed, 1 insertion(+), 956 deletions(-) delete mode 100644 uncore/llc.scala delete mode 100644 uncore/uncore.scala diff --git a/uncore/llc.scala b/uncore/llc.scala deleted file mode 100644 index a2a5d361..00000000 --- a/uncore/llc.scala +++ /dev/null @@ -1,437 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import Constants._ - -class BigMem[T <: Data](n: Int, preLatency: Int, postLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component -{ - class Inputs extends Bundle { - val addr = UFix(INPUT, log2Up(n)) - val rw = Bool(INPUT) - val wdata = gen.asInput - val wmask = gen.asInput - override def clone = new Inputs().asInstanceOf[this.type] - } - val io = new Bundle { - val in = new PipeIO()(new Inputs).flip - val rdata = gen.asOutput - } - val data = gen - val colMux = if (2*data.width <= leaf.data.width && n > leaf.n) 1 << math.floor(math.log(leaf.data.width/data.width)/math.log(2)).toInt else 1 - val nWide = if (data.width > leaf.data.width) 1+(data.width-1)/leaf.data.width else 1 - val nDeep = if (n > colMux*leaf.n) 1+(n-1)/(colMux*leaf.n) else 1 - if (nDeep > 1 || colMux > 1) - require(isPow2(n) && isPow2(leaf.n)) - - val rdataDeep = Vec(nDeep) { Bits() } - val rdataSel = Vec(nDeep) { Bool() } - for (i <- 0 until nDeep) { - val in = Pipe(io.in.valid && (if (nDeep == 1) Bool(true) else UFix(i) === io.in.bits.addr(log2Up(n)-1, log2Up(n/nDeep))), io.in.bits, preLatency) - val idx = in.bits.addr(log2Up(n/nDeep/colMux)-1, 0) - val wdata = in.bits.wdata.toBits - val wmask = in.bits.wmask.toBits - val ren = in.valid && !in.bits.rw - val reg_ren = Reg(ren) - val rdata = Vec(nWide) { Bits() } - - val r = Pipe(ren, in.bits.addr, postLatency) - - for (j <- 0 until nWide) { - val mem = leaf.clone - var dout: Bits = null - val dout1 = if (postLatency > 0) Reg() { Bits() } else null - - var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) - if (colMux > 1) - wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(in.bits.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) - val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) - when (in.valid) { - when (in.bits.rw) { mem.write(idx, wdata0, wmask0) } - .otherwise { if (postLatency > 0) dout1 := mem(idx) } - } - - if (postLatency == 0) { - dout = mem(idx) - } else if (postLatency == 1) { - dout = dout1 - } else - dout = Pipe(reg_ren, dout1, postLatency-1).bits - - rdata(j) := dout - } - val rdataWide = rdata.reduceLeft((x, y) => Cat(y, x)) - - var colMuxOut = rdataWide - if (colMux > 1) { - val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.width*(k+1)-1, gen.width*k))) { Bits() } - colMuxOut = colMuxIn(r.bits(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) - } - - rdataDeep(i) := colMuxOut - rdataSel(i) := r.valid - } - - io.rdata := Mux1H(rdataSel, rdataDeep) -} - -class LLCDataReq(ways: Int) extends MemReqCmd -{ - val way = UFix(width = log2Up(ways)) - val isWriteback = Bool() - - override def clone = new LLCDataReq(ways).asInstanceOf[this.type] -} - -class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component -{ - val io = new Bundle { - val cpu = (new FIFOIO) { new MemReqCmd }.flip - val repl_way = UFix(INPUT, log2Up(ways)) - val repl_dirty = Bool(INPUT) - val repl_tag = UFix(INPUT, PADDR_BITS - OFFSET_BITS - log2Up(sets)) - val data = (new FIFOIO) { new LLCDataReq(ways) } - val tag = (new FIFOIO) { new Bundle { - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) - val way = UFix(width = log2Up(ways)) - } } - val mem = new ioMemPipe - val mem_resp_set = UFix(OUTPUT, log2Up(sets)) - val mem_resp_way = UFix(OUTPUT, log2Up(ways)) - } - - class MSHR extends Bundle { - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) - val way = UFix(width = log2Up(ways)) - val tag = io.cpu.bits.tag.clone - val refilled = Bool() - val refillCount = UFix(width = log2Up(REFILL_CYCLES)) - val requested = Bool() - val old_dirty = Bool() - val old_tag = UFix(width = PADDR_BITS - OFFSET_BITS - log2Up(sets)) - - override def clone = new MSHR().asInstanceOf[this.type] - } - - val valid = Vec(outstanding) { Reg(resetVal = Bool(false)) } - val validBits = valid.toBits - val freeId = PriorityEncoder(~validBits) - val mshr = Vec(outstanding) { Reg() { new MSHR } } - when (io.cpu.valid && io.cpu.ready) { - valid(freeId) := Bool(true) - mshr(freeId).addr := io.cpu.bits.addr - mshr(freeId).tag := io.cpu.bits.tag - mshr(freeId).way := io.repl_way - mshr(freeId).old_dirty := io.repl_dirty - mshr(freeId).old_tag := io.repl_tag - mshr(freeId).requested := Bool(false) - mshr(freeId).refillCount := UFix(0) - mshr(freeId).refilled := Bool(false) - } - - val requests = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && !mshr(i).old_dirty && !mshr(i).requested):_*) - val request = requests.orR - val requestId = PriorityEncoder(requests) - when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { mshr(requestId).requested := Bool(true) } - - val refillId = io.mem.resp.bits.tag(log2Up(outstanding)-1, 0) - val refillCount = mshr(refillId).refillCount - when (io.mem.resp.valid) { - mshr(refillId).refillCount := refillCount + UFix(1) - when (refillCount === UFix(REFILL_CYCLES-1)) { mshr(refillId).refilled := Bool(true) } - } - - val replays = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).refilled):_*) - val replay = replays.orR - val replayId = PriorityEncoder(replays) - when (replay && io.data.ready && io.tag.ready) { valid(replayId) := Bool(false) } - - val writebacks = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).old_dirty):_*) - val writeback = writebacks.orR - val writebackId = PriorityEncoder(writebacks) - when (writeback && io.data.ready && !replay) { mshr(writebackId).old_dirty := Bool(false) } - - val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) - io.cpu.ready := !conflicts.orR && !validBits.andR - - io.data.valid := writeback - io.data.bits.rw := Bool(false) - io.data.bits.tag := mshr(replayId).tag - io.data.bits.isWriteback := Bool(true) - io.data.bits.addr := Cat(mshr(writebackId).old_tag, mshr(writebackId).addr(log2Up(sets)-1, 0)).toUFix - io.data.bits.way := mshr(writebackId).way - when (replay) { - io.data.valid := io.tag.ready - io.data.bits.isWriteback := Bool(false) - io.data.bits.addr := mshr(replayId).addr - io.data.bits.way := mshr(replayId).way - } - io.tag.valid := replay && io.data.ready - io.tag.bits.addr := io.data.bits.addr - io.tag.bits.way := io.data.bits.way - - io.mem.req_cmd.valid := request - io.mem.req_cmd.bits.rw := Bool(false) - io.mem.req_cmd.bits.addr := mshr(requestId).addr - io.mem.req_cmd.bits.tag := requestId - io.mem_resp_set := mshr(refillId).addr - io.mem_resp_way := mshr(refillId).way -} - -class LLCWriteback(requestors: Int) extends Component -{ - val io = new Bundle { - val req = Vec(requestors) { (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) }.flip } - val data = Vec(requestors) { (new FIFOIO) { new MemData }.flip } - val mem = new ioMemPipe - } - - val valid = Reg(resetVal = Bool(false)) - val who = Reg() { UFix() } - val addr = Reg() { UFix() } - val cmd_sent = Reg() { Bool() } - val data_sent = Reg() { Bool() } - val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - - var anyReq = Bool(false) - for (i <- 0 until requestors) { - io.req(i).ready := !valid && !anyReq - io.data(i).ready := valid && who === UFix(i) && io.mem.req_data.ready - anyReq = anyReq || io.req(i).valid - } - - val nextWho = PriorityEncoder(io.req.map(_.valid)) - when (!valid && io.req.map(_.valid).reduceLeft(_||_)) { - valid := Bool(true) - cmd_sent := Bool(false) - data_sent := Bool(false) - who := nextWho - addr := io.req(nextWho).bits - } - - when (io.mem.req_data.valid && io.mem.req_data.ready) { - count := count + UFix(1) - when (count === UFix(REFILL_CYCLES-1)) { - data_sent := Bool(true) - when (cmd_sent) { valid := Bool(false) } - } - } - when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { cmd_sent := Bool(true) } - when (valid && cmd_sent && data_sent) { valid := Bool(false) } - - io.mem.req_cmd.valid := valid && !cmd_sent - io.mem.req_cmd.bits.addr := addr - io.mem.req_cmd.bits.rw := Bool(true) - - io.mem.req_data.valid := valid && !data_sent && io.data(who).valid - io.mem.req_data.bits := io.data(who).bits -} - -class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Component -{ - val io = new Bundle { - val req = (new FIFOIO) { new LLCDataReq(ways) }.flip - val req_data = (new FIFOIO) { new MemData }.flip - val writeback = (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) } - val writeback_data = (new FIFOIO) { new MemData } - val resp = (new FIFOIO) { new MemResp } - val mem_resp = (new PipeIO) { new MemResp }.flip - val mem_resp_set = UFix(INPUT, log2Up(sets)) - val mem_resp_way = UFix(INPUT, log2Up(ways)) - } - - val data = new BigMem(sets*ways*REFILL_CYCLES, 1, latency-1, leaf)(Bits(width = MEM_DATA_BITS)) - class QEntry extends MemResp { - val isWriteback = Bool() - override def clone = new QEntry().asInstanceOf[this.type] - } - val q = (new Queue(latency+2)) { new QEntry } - val qReady = q.io.count <= UFix(q.entries-latency-1) - val valid = Reg(resetVal = Bool(false)) - val req = Reg() { io.req.bits.clone } - val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val refillCount = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - - when (data.io.in.valid && !io.mem_resp.valid) { - count := count + UFix(1) - when (valid && count === UFix(REFILL_CYCLES-1)) { valid := Bool(false) } - } - when (io.req.valid && io.req.ready) { valid := Bool(true); req := io.req.bits } - when (io.mem_resp.valid) { refillCount := refillCount + UFix(1) } - - data.io.in.valid := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) - data.io.in.bits.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix - data.io.in.bits.rw := io.req.bits.rw - data.io.in.bits.wdata := io.req_data.bits.data - data.io.in.bits.wmask := Fix(-1, io.req_data.bits.data.width) - when (valid) { - data.io.in.valid := Mux(req.rw, io.req_data.valid, qReady) - data.io.in.bits.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix - data.io.in.bits.rw := req.rw - } - when (io.mem_resp.valid) { - data.io.in.valid := Bool(true) - data.io.in.bits.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix - data.io.in.bits.rw := Bool(true) - data.io.in.bits.wdata := io.mem_resp.bits.data - } - - val tagPipe = Pipe(data.io.in.valid && !data.io.in.bits.rw, Mux(valid, req.tag, io.req.bits.tag), latency) - q.io.enq.valid := tagPipe.valid - q.io.enq.bits.tag := tagPipe.bits - q.io.enq.bits.isWriteback := Pipe(Mux(valid, req.isWriteback, io.req.bits.isWriteback), Bool(false), latency).valid - q.io.enq.bits.data := data.io.rdata - - io.req.ready := !valid && Mux(io.req.bits.isWriteback, io.writeback.ready, Bool(true)) - io.req_data.ready := !io.mem_resp.valid && Mux(valid, req.rw, io.req.valid && io.req.bits.rw) - - io.writeback.valid := io.req.valid && io.req.ready && io.req.bits.isWriteback - io.writeback.bits := io.req.bits.addr - - q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, io.resp.ready) - io.resp.valid := q.io.deq.valid && !q.io.deq.bits.isWriteback - io.resp.bits := q.io.deq.bits - io.writeback_data.valid := q.io.deq.valid && q.io.deq.bits.isWriteback - io.writeback_data.bits := q.io.deq.bits -} - -class MemReqArb(n: Int) extends Component // UNTESTED -{ - val io = new Bundle { - val cpu = Vec(n) { new ioMem().flip } - val mem = new ioMem - } - - val lock = Reg(resetVal = Bool(false)) - val locker = Reg() { UFix() } - - val arb = new RRArbiter(n)(new MemReqCmd) - val respWho = io.mem.resp.bits.tag(log2Up(n)-1,0) - val respTag = io.mem.resp.bits.tag >> UFix(log2Up(n)) - for (i <- 0 until n) { - val me = UFix(i, log2Up(n)) - arb.io.in(i).valid := io.cpu(i).req_cmd.valid - arb.io.in(i).bits := io.cpu(i).req_cmd.bits - arb.io.in(i).bits.tag := Cat(io.cpu(i).req_cmd.bits.tag, me) - io.cpu(i).req_cmd.ready := arb.io.in(i).ready - io.cpu(i).req_data.ready := Bool(false) - - val getLock = io.cpu(i).req_cmd.fire() && io.cpu(i).req_cmd.bits.rw && !lock - val haveLock = lock && locker === me - when (getLock) { - lock := Bool(true) - locker := UFix(i) - } - when (getLock || haveLock) { - io.cpu(i).req_data.ready := io.mem.req_data.ready - io.mem.req_data.valid := Bool(true) - io.mem.req_data.bits := io.cpu(i).req_data.bits - } - - io.cpu(i).resp.valid := io.mem.resp.valid && respWho === me - io.cpu(i).resp.bits := io.mem.resp.bits - io.cpu(i).resp.bits.tag := respTag - } - io.mem.resp.ready := io.cpu(respWho).resp.ready - - val unlock = Counter(io.mem.req_data.fire(), REFILL_CYCLES)._2 - when (unlock) { lock := Bool(false) } -} - -class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component -{ - val io = new Bundle { - val cpu = new ioMem().flip - val mem = new ioMemPipe - } - - val tagWidth = PADDR_BITS - OFFSET_BITS - log2Up(sets) - val metaWidth = tagWidth + 2 // valid + dirty - - val memCmdArb = (new Arbiter(2)) { new MemReqCmd } - val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } - val mshr = new LLCMSHRFile(sets, ways, outstanding) - val tags = new BigMem(sets, 0, 1, tagLeaf)(Bits(width = metaWidth*ways)) - val data = new LLCData(4, sets, ways, dataLeaf) - val writeback = new LLCWriteback(2) - - val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) - val initialize = !initCount(log2Up(sets)) - when (initialize) { initCount := initCount + UFix(1) } - - val stall_s1 = Bool() - val replay_s1 = Reg(resetVal = Bool(false)) - val s1_valid = Reg(io.cpu.req_cmd.valid && !stall_s1 || replay_s1, resetVal = Bool(false)) - replay_s1 := s1_valid && stall_s1 - val s1 = Reg() { new MemReqCmd } - when (io.cpu.req_cmd.valid && io.cpu.req_cmd.ready) { s1 := io.cpu.req_cmd.bits } - - val stall_s2 = Bool() - val s2_valid = Reg(resetVal = Bool(false)) - s2_valid := s1_valid && !replay_s1 && !stall_s1 || stall_s2 - val s2 = Reg() { new MemReqCmd } - val s2_tags = Vec(ways) { Reg() { Bits(width = metaWidth) } } - when (s1_valid && !stall_s1 && !replay_s1) { - s2 := s1 - for (i <- 0 until ways) - s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) - } - val s2_hits = s2_tags.map(t => t(tagWidth) && s2.addr(s2.addr.width-1, s2.addr.width-tagWidth) === t(tagWidth-1, 0)) - val s2_hit_way = OHToUFix(s2_hits) - val s2_hit = s2_hits.reduceLeft(_||_) - val s2_hit_dirty = s2_tags(s2_hit_way)(tagWidth+1) - val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) - val repl_tag = s2_tags(repl_way).toUFix - val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty - stall_s1 := initialize || stall_s2 - - val tag_we = setDirty || mshr.io.tag.valid - val tag_waddr = Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(log2Up(sets)-1,0) - val tag_wdata = Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)) - val tag_wway = Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way) - tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || tag_we - tags.io.in.bits.addr := Mux(initialize, initCount, Mux(tag_we, tag_waddr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)(log2Up(sets)-1,0))) - tags.io.in.bits.rw := initialize || tag_we - tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, tag_wdata)) - tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(tag_wway))) - when (tag_we && Mux(stall_s2, s2.addr, s1.addr)(log2Up(sets)-1,0) === tag_waddr) { s2_tags(tag_wway) := tag_wdata } - - mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw && dataArb.io.in(1).ready && writeback.io.req(0).ready // stall_s2 - mshr.io.cpu.bits := s2 - mshr.io.repl_way := repl_way - mshr.io.repl_dirty := repl_tag(tagWidth+1, tagWidth).andR - mshr.io.repl_tag := repl_tag - mshr.io.mem.resp := io.mem.resp - mshr.io.tag.ready := !setDirty - - data.io.req <> dataArb.io.out - data.io.mem_resp := io.mem.resp - data.io.mem_resp_set := mshr.io.mem_resp_set - data.io.mem_resp_way := mshr.io.mem_resp_way - data.io.req_data.bits := io.cpu.req_data.bits - data.io.req_data.valid := io.cpu.req_data.valid - - writeback.io.req(0) <> data.io.writeback - writeback.io.data(0) <> data.io.writeback_data - writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw && dataArb.io.in(1).ready && mshr.io.cpu.ready // stall_s2 - writeback.io.req(1).bits := s2.addr - writeback.io.data(1).valid := io.cpu.req_data.valid - writeback.io.data(1).bits := io.cpu.req_data.bits - - memCmdArb.io.in(0) <> mshr.io.mem.req_cmd - memCmdArb.io.in(1) <> writeback.io.mem.req_cmd - - dataArb.io.in(0) <> mshr.io.data - dataArb.io.in(1).valid := s2_valid && s2_hit && writeback.io.req(0).ready && mshr.io.cpu.ready // stall_s2 - dataArb.io.in(1).bits := s2 - dataArb.io.in(1).bits.way := s2_hit_way - dataArb.io.in(1).bits.isWriteback := Bool(false) - - stall_s2 := s2_valid && !(dataArb.io.in(1).ready && writeback.io.req(0).ready && mshr.io.cpu.ready) - - io.cpu.resp <> data.io.resp - io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 - io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready - io.mem.req_cmd <> memCmdArb.io.out - io.mem.req_data <> writeback.io.mem.req_data -} diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index cdd109c2..712dec16 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import scala.math._ +import uncore._ class ioMemSerialized extends Bundle { diff --git a/uncore/uncore.scala b/uncore/uncore.scala deleted file mode 100644 index 84b74366..00000000 --- a/uncore/uncore.scala +++ /dev/null @@ -1,519 +0,0 @@ -package rocket - -import Chisel._ -import Constants._ - -class PhysicalAddress extends Bundle { - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) -} - -class MemData extends Bundle { - val data = Bits(width = MEM_DATA_BITS) -} - -class MemReqCmd() extends PhysicalAddress -{ - val rw = Bool() - val tag = Bits(width = MEM_TAG_BITS) -} - -class MemResp () extends MemData -{ - val tag = Bits(width = MEM_TAG_BITS) -} - -class ioMem() extends Bundle -{ - val req_cmd = (new FIFOIO) { new MemReqCmd() } - val req_data = (new FIFOIO) { new MemData() } - val resp = (new FIFOIO) { new MemResp() }.flip -} - -class ioMemPipe() extends Bundle -{ - val req_cmd = (new FIFOIO) { new MemReqCmd() } - val req_data = (new FIFOIO) { new MemData() } - val resp = (new PipeIO) { new MemResp() }.flip -} - -class TrackerProbeData extends Bundle { - val tile_id = Bits(width = TILE_ID_BITS) -} - -class TrackerAllocReq extends Bundle { - val xact_init = new TransactionInit() - val tile_id = Bits(width = TILE_ID_BITS) -} - -class TrackerDependency extends Bundle { - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) -} - -class ioTileLink extends Bundle { - val xact_init = (new FIFOIO) { new TransactionInit } - val xact_init_data = (new FIFOIO) { new TransactionInitData } - val xact_abort = (new FIFOIO) { new TransactionAbort }.flip - val probe_req = (new FIFOIO) { new ProbeRequest }.flip - val probe_rep = (new FIFOIO) { new ProbeReply } - val probe_rep_data = (new FIFOIO) { new ProbeReplyData } - val xact_rep = (new FIFOIO) { new TransactionReply }.flip - val xact_finish = (new FIFOIO) { new TransactionFinish } - val incoherent = Bool(OUTPUT) -} - -class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { - val io = new Bundle { - val alloc_req = (new FIFOIO) { new TrackerAllocReq }.flip - val p_data = (new PipeIO) { new TrackerProbeData }.flip - val can_alloc = Bool(INPUT) - val xact_finish = Bool(INPUT) - val p_rep_cnt_dec = Bits(INPUT, ntiles) - val p_req_cnt_inc = Bits(INPUT, ntiles) - val tile_incoherent = Bits(INPUT, ntiles) - val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip - val x_init_data = (new PipeIO) { new TransactionInitData }.flip - val sent_x_rep_ack = Bool(INPUT) - val p_rep_data_dep = (new PipeIO) { new TrackerDependency }.flip - val x_init_data_dep = (new PipeIO) { new TrackerDependency }.flip - - val mem_req_cmd = (new FIFOIO) { new MemReqCmd } - val mem_req_data = (new FIFOIO) { new MemData } - val mem_req_lock = Bool(OUTPUT) - val probe_req = (new FIFOIO) { new ProbeRequest } - val busy = Bool(OUTPUT) - val addr = Bits(OUTPUT, PADDR_BITS - OFFSET_BITS) - val init_tile_id = Bits(OUTPUT, TILE_ID_BITS) - val p_rep_tile_id = Bits(OUTPUT, TILE_ID_BITS) - val tile_xact_id = Bits(OUTPUT, TILE_XACT_ID_BITS) - val sharer_count = Bits(OUTPUT, TILE_ID_BITS+1) - val x_type = Bits(OUTPUT, X_INIT_TYPE_MAX_BITS) - val push_p_req = Bits(OUTPUT, ntiles) - val pop_p_rep = Bits(OUTPUT, ntiles) - val pop_p_rep_data = Bits(OUTPUT, ntiles) - val pop_p_rep_dep = Bits(OUTPUT, ntiles) - val pop_x_init = Bits(OUTPUT, ntiles) - val pop_x_init_data = Bits(OUTPUT, ntiles) - val pop_x_init_dep = Bits(OUTPUT, ntiles) - val send_x_rep_ack = Bool(OUTPUT) - } - - def doMemReqWrite(req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: PipeIO[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.bits.rw := Bool(true) - req_data.bits := data.bits - when(req_cmd.ready && req_cmd.valid) { - cmd_sent := Bool(true) - } - when (at_front_of_dep_queue) { - req_cmd.valid := !cmd_sent && req_data.ready && data.valid - lock := data.valid || cmd_sent - when (req_cmd.ready || cmd_sent) { - req_data.valid := data.valid - when(req_data.ready) { - pop_data := UFix(1) << tile_id - when (data.valid) { - mem_cnt := mem_cnt_next - when(mem_cnt === UFix(REFILL_CYCLES-1)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) - } - } - } - } - } - } - - def doMemReqRead(req_cmd: FIFOIO[MemReqCmd], trigger: Bool) { - req_cmd.valid := Bool(true) - req_cmd.bits.rw := Bool(false) - when(req_cmd.ready) { - trigger := Bool(false) - } - } - - val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } - val state = Reg(resetVal = s_idle) - val addr_ = Reg{ UFix() } - val x_type_ = Reg{ Bits() } - val init_tile_id_ = Reg{ Bits() } - val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2Up(ntiles))) - val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) - val p_rep_tile_id_ = Reg{ Bits() } - val x_needs_read = Reg(resetVal = Bool(false)) - val x_init_data_needs_write = Reg(resetVal = Bool(false)) - val p_rep_data_needs_write = Reg(resetVal = Bool(false)) - val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) - val mem_cnt_next = mem_cnt + UFix(1) - val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) - val p_req_initial_flags = Bits(width = ntiles) - p_req_initial_flags := (if (ntiles == 1) Bits(0) else ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0)))) //TODO: Broadcast only - - io.busy := state != s_idle - io.addr := addr_ - io.init_tile_id := init_tile_id_ - io.p_rep_tile_id := p_rep_tile_id_ - io.tile_xact_id := tile_xact_id_ - io.sharer_count := UFix(ntiles) // TODO: Broadcast only - io.x_type := x_type_ - - io.mem_req_cmd.valid := Bool(false) - io.mem_req_cmd.bits.rw := Bool(false) - io.mem_req_cmd.bits.addr := addr_ - io.mem_req_cmd.bits.tag := UFix(id) - io.mem_req_data.valid := Bool(false) - io.mem_req_data.bits.data := UFix(0) - io.mem_req_lock := Bool(false) - io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := co.getProbeRequestType(x_type_, UFix(0)) - io.probe_req.bits.global_xact_id := UFix(id) - io.probe_req.bits.addr := addr_ - io.push_p_req := Bits(0, width = ntiles) - io.pop_p_rep := Bits(0, width = ntiles) - io.pop_p_rep_data := Bits(0, width = ntiles) - io.pop_p_rep_dep := Bits(0, width = ntiles) - io.pop_x_init := Bits(0, width = ntiles) - io.pop_x_init_data := Bits(0, width = ntiles) - io.pop_x_init_dep := Bits(0, width = ntiles) - io.send_x_rep_ack := Bool(false) - - switch (state) { - is(s_idle) { - when( io.alloc_req.valid && io.can_alloc ) { - addr_ := io.alloc_req.bits.xact_init.addr - x_type_ := io.alloc_req.bits.xact_init.x_type - init_tile_id_ := io.alloc_req.bits.tile_id - tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) - x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - p_req_flags := p_req_initial_flags - mem_cnt := UFix(0) - p_w_mem_cmd_sent := Bool(false) - x_w_mem_cmd_sent := Bool(false) - io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id - if(ntiles > 1) { - p_rep_count := PopCount(p_req_initial_flags) - state := Mux(p_req_initial_flags.orR, s_probe, s_mem) - } else state := s_mem - } - } - is(s_probe) { - when(p_req_flags.orR) { - io.push_p_req := p_req_flags - io.probe_req.valid := Bool(true) - } - when(io.p_req_cnt_inc.orR) { - p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs - } - when(io.p_rep_cnt_dec.orR) { - val dec = PopCount(io.p_rep_cnt_dec) - io.pop_p_rep := io.p_rep_cnt_dec - if(ntiles > 1) p_rep_count := p_rep_count - dec - when(p_rep_count === dec) { - state := s_mem - } - } - when(io.p_data.valid) { - p_rep_data_needs_write := Bool(true) - p_rep_tile_id_ := io.p_data.bits.tile_id - } - } - is(s_mem) { - when (p_rep_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, - io.mem_req_data, - io.mem_req_lock, - io.p_rep_data, - p_rep_data_needs_write, - p_w_mem_cmd_sent, - io.pop_p_rep_data, - io.pop_p_rep_dep, - io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)), - p_rep_tile_id_) - } . elsewhen(x_init_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, - io.mem_req_data, - io.mem_req_lock, - io.x_init_data, - x_init_data_needs_write, - x_w_mem_cmd_sent, - io.pop_x_init_data, - io.pop_x_init_dep, - io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)), - init_tile_id_) - } . elsewhen (x_needs_read) { - doMemReqRead(io.mem_req_cmd, x_needs_read) - } . otherwise { - state := Mux(co.needsAckReply(x_type_, UFix(0)), s_ack, s_busy) - } - } - is(s_ack) { - io.send_x_rep_ack := Bool(true) - when(io.sent_x_rep_ack) { state := s_busy } - } - is(s_busy) { // Nothing left to do but wait for transaction to complete - when (io.xact_finish) { - state := s_idle - } - } - } -} - -abstract class CoherenceHub(ntiles: Int, co: CoherencePolicy) extends Component { - val io = new Bundle { - val tiles = Vec(ntiles) { new ioTileLink() }.flip - val mem = new ioMem - } -} - -class CoherenceHubNull(co: ThreeStateIncoherence) extends CoherenceHub(1, co) -{ - val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.x_type === co.xactInitWriteback - x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp - io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) - io.mem.req_cmd.bits.rw := is_write - io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id - io.mem.req_cmd.bits.addr := x_init.bits.addr - io.mem.req_data <> io.tiles(0).xact_init_data - - val x_rep = io.tiles(0).xact_rep - x_rep.bits.x_type := Mux(io.mem.resp.valid, co.xactReplyData, co.xactReplyAck) - x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) - x_rep.bits.global_xact_id := UFix(0) // don't care - x_rep.bits.data := io.mem.resp.bits.data - x_rep.bits.require_ack := Bool(true) - x_rep.valid := io.mem.resp.valid || x_init.valid && is_write && io.mem.req_cmd.ready - - io.tiles(0).xact_abort.valid := Bool(false) - io.tiles(0).xact_finish.ready := Bool(true) - io.tiles(0).probe_req.valid := Bool(false) - io.tiles(0).probe_rep.ready := Bool(true) - io.tiles(0).probe_rep_data.ready := Bool(true) -} - - -class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceHub(ntiles, co) -{ - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _, co)) - - val busy_arr = Vec(NGLOBAL_XACTS){ Bool() } - val addr_arr = Vec(NGLOBAL_XACTS){ Bits(width=PADDR_BITS-OFFSET_BITS) } - val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } - val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_XACT_ID_BITS) } - val x_type_arr = Vec(NGLOBAL_XACTS){ Bits(width=X_INIT_TYPE_MAX_BITS) } - val sh_count_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } - val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } - - val do_free_arr = Vec(NGLOBAL_XACTS){ Bool() } - val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } - val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } - val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } - val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width = TILE_ID_BITS) } - val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Bool() } - - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - busy_arr(i) := t.busy - addr_arr(i) := t.addr - init_tile_id_arr(i) := t.init_tile_id - tile_xact_id_arr(i) := t.tile_xact_id - x_type_arr(i) := t.x_type - sh_count_arr(i) := t.sharer_count - send_x_rep_ack_arr(i) := t.send_x_rep_ack - t.xact_finish := do_free_arr(i) - t.p_data.bits.tile_id := p_data_tile_id_arr(i) - t.p_data.valid := p_data_valid_arr(i) - t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits - t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits - t.tile_incoherent := (Vec(io.tiles.map(_.incoherent)) { Bool() }).toBits - t.sent_x_rep_ack := sent_x_rep_ack_arr(i) - do_free_arr(i) := Bool(false) - sent_x_rep_ack_arr(i) := Bool(false) - p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) - p_data_valid_arr(i) := Bool(false) - for( j <- 0 until ntiles) { - p_rep_cnt_dec_arr(i)(j) := Bool(false) - p_req_cnt_inc_arr(i)(j) := Bool(false) - } - } - - val p_rep_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY - - // Free finished transactions - for( j <- 0 until ntiles ) { - val finish = io.tiles(j).xact_finish - when (finish.valid) { - do_free_arr(finish.bits.global_xact_id) := Bool(true) - } - finish.ready := Bool(true) - } - - // Reply to initial requestor - // Forward memory responses from mem to tile or arbitrate to ack - val mem_idx = io.mem.resp.bits.tag - val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) - for( j <- 0 until ntiles ) { - val rep = io.tiles(j).xact_rep - rep.bits.x_type := UFix(0) - rep.bits.tile_xact_id := UFix(0) - rep.bits.global_xact_id := UFix(0) - rep.bits.data := io.mem.resp.bits.data - rep.bits.require_ack := Bool(true) - rep.valid := Bool(false) - when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.x_type := co.getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) - rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) - rep.bits.global_xact_id := mem_idx - rep.valid := Bool(true) - } . otherwise { - rep.bits.x_type := co.getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) - rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) - rep.bits.global_xact_id := ack_idx - when (UFix(j) === init_tile_id_arr(ack_idx)) { - rep.valid := send_x_rep_ack_arr.toBits.orR - sent_x_rep_ack_arr(ack_idx) := rep.ready - } - } - } - io.mem.resp.ready := io.tiles(init_tile_id_arr(mem_idx)).xact_rep.ready - - // Create an arbiter for the one memory port - // We have to arbitrate between the different trackers' memory requests - // and once we have picked a request, get the right write data - val mem_req_cmd_arb = (new Arbiter(NGLOBAL_XACTS)) { new MemReqCmd() } - val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } - for( i <- 0 until NGLOBAL_XACTS ) { - mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd - mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data - mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock - } - io.mem.req_cmd <> Queue(mem_req_cmd_arb.io.out) - io.mem.req_data <> Queue(mem_req_data_arb.io.out) - - // Handle probe replies, which may or may not have data - for( j <- 0 until ntiles ) { - val p_rep = io.tiles(j).probe_rep - val p_rep_data = io.tiles(j).probe_rep_data - val idx = p_rep.bits.global_xact_id - val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) - val do_pop = foldR(pop_p_reps)(_ || _) - p_rep.ready := Bool(true) - p_rep_data_dep_list(j).io.enq.valid := p_rep.valid && co.messageHasData(p_rep.bits) - p_rep_data_dep_list(j).io.enq.bits.global_xact_id := p_rep.bits.global_xact_id - p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - when (p_rep.valid && co.messageHasData(p_rep.bits)) { - p_data_valid_arr(idx) := Bool(true) - p_data_tile_id_arr(idx) := UFix(j) - } - p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) - } - for( i <- 0 until NGLOBAL_XACTS ) { - trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid - trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits - - trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) - trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) - - for( j <- 0 until ntiles) { - val p_rep = io.tiles(j).probe_rep - p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) - } - } - - // Nack conflicting transaction init attempts - val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } - val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } - val want_to_abort_arr = Vec(ntiles) { Bool() } - for( j <- 0 until ntiles ) { - val x_init = io.tiles(j).xact_init - val x_init_data = io.tiles(j).xact_init_data - val x_abort = io.tiles(j).xact_abort - val abort_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) - val conflicts = Vec(NGLOBAL_XACTS) { Bool() } - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.addr) - } - x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && co.messageHasData(x_init.bits))) - - x_abort.valid := Bool(false) - switch(abort_state_arr(j)) { - is(s_idle) { - when(want_to_abort_arr(j)) { - when(co.messageHasData(x_init.bits)) { - abort_state_arr(j) := s_abort_drain - } . otherwise { - abort_state_arr(j) := s_abort_send - } - } - } - is(s_abort_drain) { // raises x_init_data.ready below - when(x_init_data.valid) { - abort_cnt := abort_cnt + UFix(1) - when(abort_cnt === ~UFix(0, width = log2Up(REFILL_CYCLES))) { - abort_state_arr(j) := s_abort_send - } - } - } - is(s_abort_send) { // nothing is dequeued for now - x_abort.valid := Bool(true) - when(x_abort.ready) { // raises x_init.ready below - abort_state_arr(j) := s_idle - } - } - } - } - - // Handle transaction initiation requests - // Only one allocation per cycle - // Init requests may or may not have data - val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } - val init_arb = (new Arbiter(ntiles)) { new TrackerAllocReq() } - for( i <- 0 until NGLOBAL_XACTS ) { - alloc_arb.io.in(i).valid := !trackerList(i).io.busy - trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready - trackerList(i).io.alloc_req.bits := init_arb.io.out.bits - trackerList(i).io.alloc_req.valid := init_arb.io.out.valid - - trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits - trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid - trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) - trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) - } - for( j <- 0 until ntiles ) { - val x_init = io.tiles(j).xact_init - val x_init_data = io.tiles(j).xact_init_data - val x_init_data_dep = x_init_data_dep_list(j).io.deq - val x_abort = io.tiles(j).xact_abort - init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid - init_arb.io.in(j).bits.xact_init := x_init.bits - init_arb.io.in(j).bits.tile_id := UFix(j) - val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) - val do_pop = foldR(pop_x_inits)(_||_) - x_init_data_dep_list(j).io.enq.valid := do_pop && co.messageHasData(x_init.bits) && (abort_state_arr(j) === s_idle) - x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) - x_init.ready := (x_abort.valid && x_abort.ready) || do_pop - x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) - x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) - } - - alloc_arb.io.out.ready := init_arb.io.out.valid - - // Handle probe request generation - // Must arbitrate for each request port - val p_req_arb_arr = List.fill(ntiles)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) - for( j <- 0 until ntiles ) { - for( i <- 0 until NGLOBAL_XACTS ) { - val t = trackerList(i).io - p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits - p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) - p_req_cnt_inc_arr(i)(j) := p_req_arb_arr(j).io.in(i).ready - } - p_req_arb_arr(j).io.out <> io.tiles(j).probe_req - } - -} From 916c1019af027e7d42133676644f70c4cbb9df06 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Tue, 9 Oct 2012 13:02:58 -0700 Subject: [PATCH 40/40] fixed memdessert unpacking --- uncore/memserdes.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uncore/memserdes.scala b/uncore/memserdes.scala index 712dec16..e8d99514 100644 --- a/uncore/memserdes.scala +++ b/uncore/memserdes.scala @@ -144,8 +144,8 @@ class MemDessert extends Component // test rig side val req_cmd = in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (abits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) io.wide.req_cmd.valid := state === s_cmd io.wide.req_cmd.bits.tag := req_cmd - io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width) - io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.addr.width) + io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.rw.width) + io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width) io.wide.req_data.valid := state === s_data io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH)