1
0

replace tile memory interface with ioTileLink

work in progress towards coherent HTIF. for now, requests
are incoherently passed through a null coherence hub.
This commit is contained in:
Andrew Waterman 2012-02-29 03:08:04 -08:00
parent 082b38d315
commit 012da6002e
10 changed files with 163 additions and 160 deletions

View File

@ -4,75 +4,64 @@ import Chisel._;
import Node._; import Node._;
import Constants._; import Constants._;
class ioMem() extends Bundle
{
val req_val = Bool(OUTPUT);
val req_rdy = Bool(INPUT);
val req_rw = Bool(OUTPUT);
val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT);
val req_tag = Bits(MEM_TAG_BITS, OUTPUT);
val req_data_val = Bool(OUTPUT);
val req_data_rdy = Bool(INPUT);
val req_data_bits = Bits(MEM_DATA_BITS, OUTPUT);
val resp_val = Bool(INPUT);
val resp_tag = Bits(MEM_TAG_BITS, INPUT);
val resp_data = Bits(MEM_DATA_BITS, INPUT);
}
class ioMemArbiter(n: Int) extends Bundle() {
val mem = new ioMem();
val requestor = Vec(n) { new ioMem().flip() }
}
class rocketMemArbiter(n: Int) extends Component { class rocketMemArbiter(n: Int) extends Component {
val io = new ioMemArbiter(n); val io = new Bundle {
val mem = new ioTileLink
val requestor = Vec(n) { new ioTileLink().flip }
}
var req_val = Bool(false) var req_val = Bool(false)
var req_rdy = io.mem.req_rdy var req_rdy = io.mem.xact_init.ready
for (i <- 0 until n) for (i <- 0 until n)
{ {
io.requestor(i).req_rdy := req_rdy io.requestor(i).xact_init.ready := req_rdy
req_val = req_val || io.requestor(i).req_val req_val = req_val || io.requestor(i).xact_init.valid
req_rdy = req_rdy && !io.requestor(i).req_val req_rdy = req_rdy && !io.requestor(i).xact_init.valid
} }
// if more than one requestor at a time can write back, the data
// arbiter needs to be made stateful: one xact's write data must
// be sent to the memory system contiguously.
var req_data_val = Bool(false) var req_data_val = Bool(false)
var req_data_rdy = io.mem.req_data_rdy var req_data_rdy = io.mem.xact_init_data.ready
for (i <- 0 until n) for (i <- 0 until n)
{ {
io.requestor(i).req_data_rdy := req_data_rdy io.requestor(i).xact_init_data.ready := req_data_rdy
req_data_val = req_data_val || io.requestor(i).req_data_val req_data_val = req_data_val || io.requestor(i).xact_init_data.valid
req_data_rdy = req_data_rdy && !io.requestor(i).req_data_val req_data_rdy = req_data_rdy && !io.requestor(i).xact_init_data.valid
} }
var req_rw = io.requestor(n-1).req_rw var req_bits = Wire() { new TransactionInit }
var req_addr = io.requestor(n-1).req_addr req_bits := io.requestor(n-1).xact_init.bits
var req_tag = Cat(io.requestor(n-1).req_tag, UFix(n-1, log2up(n))) req_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n)))
for (i <- n-1 to 0 by -1) for (i <- n-2 to 0 by -1)
{ {
req_rw = Mux(io.requestor(i).req_val, io.requestor(i).req_rw, req_rw) var my_req_bits = Wire() { new TransactionInit }
req_addr = Mux(io.requestor(i).req_val, io.requestor(i).req_addr, req_addr) my_req_bits := io.requestor(i).xact_init.bits
req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) my_req_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n)))
req_bits = Mux(io.requestor(i).xact_init.valid, my_req_bits, req_bits)
} }
var req_data_bits = io.requestor(n-1).req_data_bits var req_data_bits = io.requestor(n-1).xact_init_data.bits
for (i <- n-1 to 0 by -1) for (i <- n-2 to 0 by -1)
req_data_bits = Mux(io.requestor(i).req_data_val, io.requestor(i).req_data_bits, req_data_bits) req_data_bits = Mux(io.requestor(i).xact_init_data.valid, io.requestor(i).xact_init_data.bits, req_data_bits)
io.mem.req_val := req_val io.mem.xact_init.valid := req_val
io.mem.req_rw := req_rw io.mem.xact_init.bits := req_bits
io.mem.req_addr := req_addr
io.mem.req_tag := req_tag
io.mem.req_data_val := req_data_val io.mem.xact_init_data.valid := req_data_val
io.mem.req_data_bits := req_data_bits io.mem.xact_init_data.bits := req_data_bits
for (i <- 0 until n) for (i <- 0 until n)
{ {
io.requestor(i).resp_val := io.mem.resp_val && io.mem.resp_tag(log2up(n)-1,0) === UFix(i) val tag = io.mem.xact_rep.bits.tile_xact_id
io.requestor(i).resp_data := io.mem.resp_data io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2up(n)-1,0) === UFix(i)
io.requestor(i).resp_tag := io.mem.resp_tag >> UFix(log2up(n)) io.requestor(i).xact_rep.bits.data := io.mem.xact_rep.bits.data
io.requestor(i).xact_rep.bits.t_type := io.mem.xact_rep.bits.t_type
io.requestor(i).xact_rep.bits.has_data := io.mem.xact_rep.bits.has_data
io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2up(n))
io.requestor(i).xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id
} }
io.mem.xact_rep.ready := Bool(true) // XXX we shouldn't have xact_rep.ready
} }

View File

@ -10,22 +10,21 @@ class MemData extends Bundle {
class MemReqCmd() extends Bundle class MemReqCmd() extends Bundle
{ {
val rw = Bool() val rw = Bool()
val addr = UFix(PADDR_BITS - OFFSET_BITS) val addr = UFix(width = PADDR_BITS - OFFSET_BITS)
val tag = Bits(MEM_TAG_BITS) val tag = Bits(width = MEM_TAG_BITS)
} }
class MemResp () extends Bundle class MemResp () extends Bundle
{ {
val tag = Bits(MEM_TAG_BITS) val tag = Bits(width = MEM_TAG_BITS)
val data = Bits(width = MEM_DATA_BITS) val data = Bits(width = MEM_DATA_BITS)
val valid = Bool()
} }
class ioMemHub() extends Bundle class ioMem() extends Bundle
{ {
val req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip val req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip
val req_data = (new ioDecoupled) { new MemData() }.flip val req_data = (new ioDecoupled) { new MemData() }.flip
val resp = new MemResp() val resp = (new ioValid) { new MemResp() }
} }
class HubMemReq extends Bundle { class HubMemReq extends Bundle {
@ -49,7 +48,7 @@ class TransactionInit extends Bundle {
val t_type = Bits(width = TTYPE_BITS) val t_type = Bits(width = TTYPE_BITS)
val has_data = Bool() val has_data = Bool()
val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS)
val address = Bits(width = PADDR_BITS) val address = UFix(width = PADDR_BITS)
} }
class TransactionInitData extends MemData class TransactionInitData extends MemData
@ -348,8 +347,8 @@ abstract class CoherenceHub extends Component with CoherencePolicy
class CoherenceHubNull extends Component { class CoherenceHubNull extends Component {
val io = new Bundle { val io = new Bundle {
val tile = new ioTileLink() val tile = new ioTileLink().flip
val mem = new ioMemHub() val mem = new ioMem
} }
val x_init = io.tile.xact_init val x_init = io.tile.xact_init
@ -362,11 +361,11 @@ class CoherenceHubNull extends Component {
io.mem.req_data <> io.tile.xact_init_data io.mem.req_data <> io.tile.xact_init_data
val x_rep = io.tile.xact_rep val x_rep = io.tile.xact_rep
x_rep.bits.t_type := Mux(is_write, X_WRITE_UNCACHED, X_READ_EXCLUSIVE) x_rep.bits.t_type := Mux(io.mem.resp.valid, X_READ_EXCLUSIVE, X_WRITE_UNCACHED)
x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp.tag) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id)
x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.global_xact_id := UFix(0) // don't care
x_rep.bits.data := io.mem.resp.data x_rep.bits.data := io.mem.resp.bits.data
x_rep.valid := io.mem.resp.valid || is_write x_rep.valid := io.mem.resp.valid || x_init.valid && is_write
} }
@ -388,7 +387,7 @@ class CoherenceHubNoDir extends CoherenceHub {
val io = new Bundle { val io = new Bundle {
val tiles = Vec(NTILES) { new ioTileLink() } val tiles = Vec(NTILES) { new ioTileLink() }
val mem = new ioMemHub val mem = new ioMem
} }
val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_))
@ -427,12 +426,12 @@ class CoherenceHubNoDir extends CoherenceHub {
// Reply to initial requestor // Reply to initial requestor
// Forward memory responses from mem to tile // Forward memory responses from mem to tile
val idx = io.mem.resp.tag val idx = io.mem.resp.bits.tag
for( j <- 0 until NTILES ) { for( j <- 0 until NTILES ) {
io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx)) io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx))
io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx) io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx)
io.tiles(j).xact_rep.bits.global_xact_id := idx io.tiles(j).xact_rep.bits.global_xact_id := idx
io.tiles(j).xact_rep.bits.data := io.mem.resp.data io.tiles(j).xact_rep.bits.data := io.mem.resp.bits.data
io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && (io.mem.resp.valid || send_x_rep_ack_arr.read(idx)) io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && (io.mem.resp.valid || send_x_rep_ack_arr.read(idx))
} }
// If there were a ready signal due to e.g. intervening network use: // If there were a ready signal due to e.g. intervening network use:

View File

@ -182,7 +182,7 @@ object Constants
val NTILES = 1 val NTILES = 1
val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8
val TILE_ID_BITS = 1 val TILE_ID_BITS = 1
val TILE_XACT_ID_BITS = 1 // log2(NMSHR) val TILE_XACT_ID_BITS = log2up(NMSHR)+2
val GLOBAL_XACT_ID_BITS = 4 val GLOBAL_XACT_ID_BITS = 4
val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS
@ -201,7 +201,7 @@ object Constants
val MEM_TAG_BITS = 4 val MEM_TAG_BITS = 4
val MEM_DATA_BITS = 128 val MEM_DATA_BITS = 128
val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS
require(MEM_TAG_BITS >= max(log2up(NMSHR)+1, GLOBAL_XACT_ID_BITS)) require(MEM_TAG_BITS >= max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS))
val DTLB_ENTRIES = 8; val DTLB_ENTRIES = 8;
val ITLB_ENTRIES = 8; val ITLB_ENTRIES = 8;

View File

@ -26,7 +26,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component
val io = new Bundle { val io = new Bundle {
val host = new ioHost(w) val host = new ioHost(w)
val cpu = Vec(ncores) { new ioHTIF().flip() } val cpu = Vec(ncores) { new ioHTIF().flip() }
val mem = new ioMem val mem = new ioTileLink
} }
val short_request_bits = 64 val short_request_bits = 64
@ -93,11 +93,11 @@ class rocketHTIF(w: Int, ncores: Int) extends Component
} }
val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES)))
when (state === state_mem_req && io.mem.req_rdy) { when (state === state_mem_req && io.mem.xact_init.ready) {
state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata)
} }
when (state === state_mem_wdata && io.mem.req_data_rdy || when (state === state_mem_wdata && io.mem.xact_init_data.ready ||
state === state_mem_rdata && io.mem.resp_val) { state === state_mem_rdata && io.mem.xact_rep.valid) {
when (mem_cnt.andR) { when (mem_cnt.andR) {
state := state_tx state := state_tx
} }
@ -112,16 +112,17 @@ class rocketHTIF(w: Int, ncores: Int) extends Component
var mem_req_data: Bits = null var mem_req_data: Bits = null
for (i <- 0 until MEM_DATA_BITS/short_request_bits) { for (i <- 0 until MEM_DATA_BITS/short_request_bits) {
val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits)))
packet_ram.write(idx, io.mem.resp_data((i+1)*short_request_bits-1, i*short_request_bits), packet_ram.write(idx, io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits),
state === state_mem_rdata && io.mem.resp_val) state === state_mem_rdata && io.mem.xact_rep.valid)
mem_req_data = Cat(packet_ram.read(idx), mem_req_data) mem_req_data = Cat(packet_ram.read(idx), mem_req_data)
} }
io.mem.req_val := state === state_mem_req io.mem.xact_init.valid := state === state_mem_req
io.mem.req_rw := cmd === cmd_writemem io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_WRITE_UNCACHED, X_READ_UNCACHED)
io.mem.req_addr := addr >> UFix(OFFSET_BITS-3) io.mem.xact_init.bits.has_data := cmd === cmd_writemem
io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3)
io.mem.req_data_val := state === state_mem_wdata io.mem.xact_init_data.valid:= state === state_mem_wdata
io.mem.req_data_bits := mem_req_data io.mem.xact_init_data.bits.data := mem_req_data
pcr_done := Bool(false) pcr_done := Bool(false)
val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) }

View File

@ -20,7 +20,7 @@ class ioImem(view: List[String] = null) extends Bundle (view)
class ioRocketICache extends Bundle() class ioRocketICache extends Bundle()
{ {
val cpu = new ioImem(); val cpu = new ioImem();
val mem = new ioMem val mem = new ioTileLink
} }
// basic direct mapped instruction cache // basic direct mapped instruction cache
@ -75,7 +75,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component {
// refill counter // refill counter
val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits));
when (io.mem.resp_val) { when (io.mem.xact_rep.valid) {
refill_count := refill_count + UFix(1); refill_count := refill_count + UFix(1);
} }
@ -84,7 +84,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component {
val tag_addr = val tag_addr =
Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb), Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb),
io.cpu.req_idx(indexmsb,indexlsb)).toUFix; io.cpu.req_idx(indexmsb,indexlsb)).toUFix;
val tag_we = (state === s_refill_wait) && io.mem.resp_val; val tag_we = (state === s_refill_wait) && io.mem.xact_rep.valid;
val data_addr = val data_addr =
Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count),
io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix;
@ -112,10 +112,10 @@ class rocketICache(sets: Int, assoc: Int) extends Component {
val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb))
// data array // data array
val data_array = Mem(sets*REFILL_CYCLES){ io.mem.resp_data } val data_array = Mem(sets*REFILL_CYCLES){ io.mem.xact_rep.bits.data }
data_array.setReadLatency(1); data_array.setReadLatency(1);
data_array.setTarget('inst); data_array.setTarget('inst);
val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) val data_out = data_array.rw(data_addr, io.mem.xact_rep.bits.data, io.mem.xact_rep.valid && repl_me)
data_mux.io.sel(i) := hit data_mux.io.sel(i) := hit
data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0);
@ -128,10 +128,11 @@ class rocketICache(sets: Int, assoc: Int) extends Component {
io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit;
rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit);
io.cpu.resp_data := data_mux.io.out io.cpu.resp_data := data_mux.io.out
io.mem.req_val := (state === s_request); io.mem.xact_init.valid := (state === s_request)
io.mem.req_rw := Bool(false) io.mem.xact_init.bits.t_type := X_READ_UNCACHED
io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix io.mem.xact_init.bits.has_data := Bool(false)
io.mem.req_data_val := Bool(false) io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix
io.mem.xact_init_data.valid := Bool(false)
// control state machine // control state machine
switch (state) { switch (state) {
@ -148,19 +149,19 @@ class rocketICache(sets: Int, assoc: Int) extends Component {
} }
is (s_request) is (s_request)
{ {
when (io.mem.req_rdy) { when (io.mem.xact_init.ready) {
state := s_refill_wait; state := s_refill_wait;
} }
} }
is (s_refill_wait) { is (s_refill_wait) {
when (io.mem.resp_val) { when (io.mem.xact_rep.valid) {
state := s_refill; state := s_refill;
} }
} }
is (s_refill) { is (s_refill) {
when (io.mem.resp_val && (~refill_count === UFix(0))) { when (io.mem.xact_rep.valid && refill_count.andR) {
state := s_ready; state := s_ready;
} }
} }
} }
} }

View File

@ -6,8 +6,8 @@ import Constants._;
import scala.math._; import scala.math._;
class ioIPrefetcher extends Bundle() { class ioIPrefetcher extends Bundle() {
val icache = new ioMem().flip val icache = new ioTileLink().flip
val mem = new ioMem val mem = new ioTileLink
val invalidate = Bool(INPUT) val invalidate = Bool(INPUT)
} }
@ -18,22 +18,23 @@ class rocketIPrefetcher extends Component() {
val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() };
val state = Reg(resetVal = s_invalid); val state = Reg(resetVal = s_invalid);
val demand_miss = io.icache.req_val & io.icache.req_rdy; val demand_miss = io.icache.xact_init.valid && io.icache.xact_init.ready
val prefetch_addr = Reg() { UFix(width = io.icache.req_addr.width) }; val prefetch_addr = Reg() { UFix(width = io.icache.xact_init.bits.address.width) };
when (demand_miss) { prefetch_addr := io.icache.req_addr + UFix(1); } val addr_match = (prefetch_addr === io.icache.xact_init.bits.address);
val addr_match = (prefetch_addr === io.icache.req_addr);
val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; val hit = (state != s_invalid) & (state != s_req_wait) & addr_match;
val prefetch_miss = io.icache.xact_init.valid && !hit
when (demand_miss) { prefetch_addr := io.icache.xact_init.bits.address + UFix(1); }
io.icache.req_rdy := io.mem.req_rdy; io.icache.xact_init.ready := io.mem.xact_init.ready
val ip_mem_req_rdy = io.mem.req_rdy & ~(io.icache.req_val & ~hit); val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss
val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0)
io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait)
io.mem.req_rw := Bool(false) io.mem.xact_init.bits.t_type := X_READ_UNCACHED
io.mem.req_tag := Mux(io.icache.req_val && !hit, UFix(0), UFix(1)) io.mem.xact_init.bits.has_data := Bool(false)
io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1))
io.mem.req_data_val := Bool(false) io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr);
io.mem.xact_init_data.valid := Bool(false)
val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt));
when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); }
@ -45,11 +46,11 @@ class rocketIPrefetcher extends Component() {
val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid;
forward := (demand_miss & hit | forward & ~forward_done); forward := (demand_miss & hit | forward & ~forward_done);
io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid)
io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data)
pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false))
pdq.io.enq.bits := io.mem.resp_data; pdq.io.enq.bits := io.mem.xact_rep.bits.data
pdq.io.enq.valid := ip_mem_resp_val.toBool; pdq.io.enq.valid := ip_mem_resp_val.toBool;
pdq.io.deq.ready := forward; pdq.io.deq.ready := forward;

View File

@ -133,12 +133,6 @@ class DataArrayArrayReq extends Bundle {
val way_en = Bits(width = NWAYS) val way_en = Bits(width = NWAYS)
} }
class MemReq extends Bundle {
val rw = Bool()
val addr = UFix(width = PADDR_BITS-OFFSET_BITS)
val tag = Bits(width = MEM_TAG_BITS)
}
class WritebackReq extends Bundle { class WritebackReq extends Bundle {
val ppn = Bits(width = TAG_BITS) val ppn = Bits(width = TAG_BITS)
val idx = Bits(width = IDX_BITS) val idx = Bits(width = IDX_BITS)
@ -182,7 +176,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence {
val way_oh = Bits(NWAYS, OUTPUT) val way_oh = Bits(NWAYS, OUTPUT)
val mem_resp_val = Bool(INPUT) val mem_resp_val = Bool(INPUT)
val mem_req = (new ioDecoupled) { new MemReq() }.flip val mem_req = (new ioDecoupled) { new TransactionInit }.flip
val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip
val replay = (new ioDecoupled) { new Replay() }.flip val replay = (new ioDecoupled) { new Replay() }.flip
} }
@ -257,10 +251,10 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence {
io.meta_req.bits.way_en := way_oh_ io.meta_req.bits.way_en := way_oh_
io.mem_req.valid := valid && !requested io.mem_req.valid := valid && !requested
//io.mem_req.bits.itm := next_dirty io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_READ_EXCLUSIVE, X_READ_SHARED)
io.mem_req.bits.rw := Bool(false) io.mem_req.bits.has_data := Bool(false)
io.mem_req.bits.addr := Cat(ppn, idx_).toUFix io.mem_req.bits.address := Cat(ppn, idx_).toUFix
io.mem_req.bits.tag := Bits(id) io.mem_req.bits.tile_xact_id := Bits(id)
io.replay.valid := rpq.io.deq.valid && refilled io.replay.valid := rpq.io.deq.valid && refilled
io.replay.bits.idx := idx_ io.replay.bits.idx := idx_
@ -287,7 +281,7 @@ class MSHRFile extends Component {
val fence_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT)
val mem_req = (new ioDecoupled) { new MemReq() }.flip() val mem_req = (new ioDecoupled) { new TransactionInit }.flip()
val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip()
val replay = (new ioDecoupled) { new Replay() }.flip() val replay = (new ioDecoupled) { new Replay() }.flip()
} }
@ -296,7 +290,7 @@ class MSHRFile extends Component {
val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) }
val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) }
val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() }
val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit }
val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val replay_arb = (new Arbiter(NMSHR)) { new Replay() }
val alloc_arb = (new Arbiter(NMSHR)) { Bool() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() }
@ -421,9 +415,9 @@ class WritebackUnit extends Component {
val req = (new ioDecoupled) { new WritebackReq() } val req = (new ioDecoupled) { new WritebackReq() }
val data_req = (new ioDecoupled) { new DataArrayArrayReq() }.flip() val data_req = (new ioDecoupled) { new DataArrayArrayReq() }.flip()
val data_resp = Bits(MEM_DATA_BITS, INPUT) val data_resp = Bits(MEM_DATA_BITS, INPUT)
val refill_req = (new ioDecoupled) { new MemReq() } val refill_req = (new ioDecoupled) { new TransactionInit }
val mem_req = (new ioDecoupled) { new MemReq() }.flip() val mem_req = (new ioDecoupled) { new TransactionInit }.flip
val mem_req_data = (new ioDecoupled) { Bits(width = MEM_DATA_BITS) }.flip() val mem_req_data = (new ioDecoupled) { new TransactionInitData }.flip
} }
val valid = Reg(resetVal = Bool(false)) val valid = Reg(resetVal = Bool(false))
@ -449,12 +443,13 @@ class WritebackUnit extends Component {
val wb_req_val = io.req.valid && !valid val wb_req_val = io.req.valid && !valid
io.refill_req.ready := io.mem_req.ready && !wb_req_val io.refill_req.ready := io.mem_req.ready && !wb_req_val
io.mem_req.valid := io.refill_req.valid || wb_req_val io.mem_req.valid := io.refill_req.valid || wb_req_val
io.mem_req.bits.rw := wb_req_val io.mem_req.bits.t_type := Mux(wb_req_val, X_WRITE_UNCACHED, io.refill_req.bits.t_type)
io.mem_req.bits.addr := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.addr) io.mem_req.bits.has_data := wb_req_val
io.mem_req.bits.tag := io.refill_req.bits.tag io.mem_req.bits.address := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.address)
io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id)
io.mem_req_data.valid := data_req_fired io.mem_req_data.valid := data_req_fired
io.mem_req_data.bits := io.data_resp io.mem_req_data.bits.data := io.data_resp
} }
class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{
@ -680,7 +675,7 @@ abstract class HellaCache extends Component {
class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
val io = new Bundle { val io = new Bundle {
val cpu = new ioDmem() val cpu = new ioDmem()
val mem = new ioMem val mem = new ioTileLink
} }
val lines = 1 << IDX_BITS val lines = 1 << IDX_BITS
@ -749,9 +744,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data)
// refill counter // refill counter
val mem_resp_type = io.mem.xact_rep.bits.t_type
val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_READ_SHARED || mem_resp_type === X_READ_EXCLUSIVE)
val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES)))
val rr_count_next = rr_count + UFix(1) val rr_count_next = rr_count + UFix(1)
when (io.mem.resp_val) { rr_count := rr_count_next } when (refill_val) { rr_count := rr_count_next }
val misaligned = val misaligned =
(((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) ||
@ -806,19 +803,19 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
val needs_writeback = needsWriteback(meta_wb_mux.state) val needs_writeback = needsWriteback(meta_wb_mux.state)
// refill response // refill response
val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) val block_during_refill = !refill_val && (rr_count != UFix(0))
data_arb.io.in(0).bits.inner_req.offset := rr_count data_arb.io.in(0).bits.inner_req.offset := rr_count
data_arb.io.in(0).bits.inner_req.rw := !block_during_refill data_arb.io.in(0).bits.inner_req.rw := !block_during_refill
data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8)
data_arb.io.in(0).bits.inner_req.data := io.mem.resp_data data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data
data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill data_arb.io.in(0).valid := refill_val || block_during_refill
// load hits // load hits
data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb)
data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb)
data_arb.io.in(4).bits.inner_req.rw := Bool(false) data_arb.io.in(4).bits.inner_req.rw := Bool(false)
data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care
data_arb.io.in(4).bits.inner_req.data := io.mem.resp_data // don't care data_arb.io.in(4).bits.inner_req.data := io.mem.xact_rep.bits.data // don't care
data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).valid := io.cpu.req_val && req_read
data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check
val early_load_nack = req_read && !data_arb.io.in(4).ready val early_load_nack = req_read && !data_arb.io.in(4).ready
@ -884,8 +881,8 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
mshr.io.req_type := r_cpu_req_type mshr.io.req_type := r_cpu_req_type
mshr.io.req_sdq_id := replayer.io.sdq_id mshr.io.req_sdq_id := replayer.io.sdq_id
mshr.io.req_way_oh := replaced_way_oh mshr.io.req_way_oh := replaced_way_oh
mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0))
mshr.io.mem_resp_tag := io.mem.resp_tag mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id
mshr.io.mem_req <> wb.io.refill_req mshr.io.mem_req <> wb.io.refill_req
mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.meta_req <> meta_arb.io.in(1)
mshr.io.replay <> replayer.io.replay mshr.io.replay <> replayer.io.replay
@ -968,14 +965,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
io.cpu.resp_type := loadgen.io.typ io.cpu.resp_type := loadgen.io.typ
io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data := loadgen.io.dout
io.cpu.resp_data_subword := loadgen.io.r_dout_subword io.cpu.resp_data_subword := loadgen.io.r_dout_subword
wb.io.mem_req.ready := io.mem.req_rdy io.mem.xact_init <> wb.io.mem_req
io.mem.req_val := wb.io.mem_req.valid io.mem.xact_init_data <> wb.io.mem_req_data
io.mem.req_rw := wb.io.mem_req.bits.rw
io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix
io.mem.req_addr := wb.io.mem_req.bits.addr
io.mem.req_data_val := wb.io.mem_req_data.valid
wb.io.mem_req_data.ready := io.mem.req_data_rdy
io.mem.req_data_bits := wb.io.mem_req_data.bits
} }

View File

@ -10,7 +10,7 @@ class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle
val deq = new ioDecoupled()(data).flip val deq = new ioDecoupled()(data).flip
} }
class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) extends Component class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component
{ {
val io = new ioQueue(flushable)(data) val io = new ioQueue(flushable)(data)
@ -50,6 +50,6 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext
} }
io.deq.valid := maybe_full || enq_ptr != deq_ptr io.deq.valid := maybe_full || enq_ptr != deq_ptr
io.enq.ready := !maybe_full || enq_ptr != deq_ptr io.enq.ready := !maybe_full || enq_ptr != deq_ptr || (if (pipe) io.deq.ready else Bool(false))
io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr)
} }

View File

@ -7,7 +7,7 @@ import Constants._;
class ioTop(htif_width: Int) extends Bundle { class ioTop(htif_width: Int) extends Bundle {
val debug = new ioDebug(); val debug = new ioDebug();
val host = new ioHost(htif_width); val host = new ioHost(htif_width);
val mem = new ioMem(); val mem = new ioMem
} }
class Top() extends Component { class Top() extends Component {
@ -21,23 +21,39 @@ class Top() extends Component {
val icache_pf = new rocketIPrefetcher(); val icache_pf = new rocketIPrefetcher();
val dcache = new HellaCacheUniproc(); val dcache = new HellaCacheUniproc();
val arbiter = new rocketMemArbiter(4); val arbiter = new rocketMemArbiter(3 + (if (HAVE_VEC) 1 else 0));
arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(0) <> dcache.io.mem
arbiter.io.requestor(1) <> icache_pf.io.mem arbiter.io.requestor(1) <> icache_pf.io.mem
arbiter.io.requestor(3) <> htif.io.mem arbiter.io.requestor(2) <> htif.io.mem
arbiter.io.mem <> io.mem
val hub = new CoherenceHubNull
// connect tile to hub (figure out how to do this more compactly)
val xact_init_q = (new queue(2)) { new TransactionInit }
xact_init_q.io.enq <> arbiter.io.mem.xact_init
xact_init_q.io.deq <> hub.io.tile.xact_init
val xact_init_data_q = (new queue(2)) { new TransactionInitData }
xact_init_data_q.io.enq <> arbiter.io.mem.xact_init_data
xact_init_data_q.io.deq <> hub.io.tile.xact_init_data
val xact_rep_q = (new queue(1, pipe = true)) { new TransactionReply }
xact_rep_q.io.enq <> hub.io.tile.xact_rep
xact_rep_q.io.deq <> arbiter.io.mem.xact_rep
// connect hub to memory
val mem_req_q = (new queue(2)) { new MemReqCmd }
mem_req_q.io.enq <> hub.io.mem.req_cmd
mem_req_q.io.deq <> io.mem.req_cmd
val mem_req_data_q = (new queue(2)) { new MemData }
mem_req_data_q.io.enq <> hub.io.mem.req_data
mem_req_data_q.io.deq <> io.mem.req_data
hub.io.mem.resp.valid := Reg(io.mem.resp.valid, resetVal = Bool(false))
hub.io.mem.resp.bits := Reg(io.mem.resp.bits)
if (HAVE_VEC) if (HAVE_VEC)
{ {
val vicache = new rocketICache(128, 2); // 128 sets x 2 ways val vicache = new rocketICache(128, 2); // 128 sets x 2 ways
arbiter.io.requestor(2) <> vicache.io.mem arbiter.io.requestor(3) <> vicache.io.mem
cpu.io.vimem <> vicache.io.cpu; cpu.io.vimem <> vicache.io.cpu;
} }
else
{
arbiter.io.requestor(2).req_val := Bool(false)
arbiter.io.requestor(2).req_data_val := Bool(false)
}
htif.io.host <> io.host htif.io.host <> io.host
cpu.io.host <> htif.io.cpu(0); cpu.io.host <> htif.io.cpu(0);

View File

@ -180,6 +180,12 @@ class ioDecoupled[T <: Data]()(data: => T) extends Bundle
val bits = data.asInput val bits = data.asInput
} }
class ioValid[T <: Data]()(data: => T) extends Bundle
{
val valid = Bool(INPUT)
val bits = data.asInput
}
class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle {
val in = Vec(n) { (new ioDecoupled()) { data } } val in = Vec(n) { (new ioDecoupled()) { data } }
val out = (new ioDecoupled()) { data }.flip() val out = (new ioDecoupled()) { data }.flip()