1
0

single-ported coherence master

This commit is contained in:
Henry Cook 2013-01-16 23:57:35 -08:00
parent f7c0152409
commit fb2644760f
2 changed files with 347 additions and 3 deletions

View File

@ -112,11 +112,11 @@ class MasterSourcedIO[T <: Data]()(data: => T) extends DirectionalFIFOIO()(data)
class TileLinkIO(implicit conf: LogicalNetworkConfiguration) extends Bundle { class TileLinkIO(implicit conf: LogicalNetworkConfiguration) extends Bundle {
val xact_init = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInit }} val xact_init = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInit }}
val xact_init_data = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInitData }} val xact_init_data = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInitData }}
val xact_abort = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionAbort }} val xact_abort = (new MasterSourcedIO){(new LogicalNetworkIO){new TransactionAbort }}
val probe_req = (new MasterSourcedIO) {(new LogicalNetworkIO){new ProbeRequest }} val probe_req = (new MasterSourcedIO){(new LogicalNetworkIO){new ProbeRequest }}
val probe_rep = (new ClientSourcedIO){(new LogicalNetworkIO){new ProbeReply }} val probe_rep = (new ClientSourcedIO){(new LogicalNetworkIO){new ProbeReply }}
val probe_rep_data = (new ClientSourcedIO){(new LogicalNetworkIO){new ProbeReplyData }} val probe_rep_data = (new ClientSourcedIO){(new LogicalNetworkIO){new ProbeReplyData }}
val xact_rep = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionReply }} val xact_rep = (new MasterSourcedIO){(new LogicalNetworkIO){new TransactionReply }}
val xact_finish = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionFinish }} val xact_finish = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionFinish }}
override def clone = { new TileLinkIO().asInstanceOf[this.type] } override def clone = { new TileLinkIO().asInstanceOf[this.type] }
} }

View File

@ -524,3 +524,347 @@ class CoherenceHubBroadcast(implicit conf: CoherenceHubConfiguration) extends Co
} }
} }
abstract class CoherenceAgent(implicit conf: LogicalNetworkConfiguration) extends Component with MasterCoherenceAgent {
val io = new Bundle {
val network = (new TileLinkIO).flip
val incoherent = Vec(conf.nTiles) { Bool() }.asInput
val mem = new ioMem
}
}
class L2CoherenceAgent(implicit conf: CoherenceHubConfiguration) extends CoherenceAgent()(conf.ln)
{
implicit val lnConf = conf.ln
val co = conf.co
val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_))
val p_rep_data_dep_q = (new Queue(NGLOBAL_XACTS)){new TrackerDependency} // depth must >= NPRIMARY
val x_init_data_dep_q = (new Queue(NGLOBAL_XACTS)){new TrackerDependency} // depth should >= NPRIMARY
for( i <- 0 until NGLOBAL_XACTS ) {
val t = trackerList(i)
t.io.tile_incoherent := io.incoherent.toBits
t.io.mem_resp.valid := io.mem.resp.valid && (io.mem.resp.bits.tag === UFix(i))
t.io.mem_resp.bits := io.mem.resp.bits
}
io.mem.resp.ready := trackerList.map(_.io.mem_resp.ready).reduce(_||_)
// Handle transaction initiation requests
// Only one allocation per cycle
// Init requests may or may not have data
val x_init = io.network.xact_init
val x_init_data = io.network.xact_init_data
val x_abort = io.network.xact_abort
val x_dep_deq = x_init_data_dep_q.io.deq
val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() }
val abort_state = Reg(resetVal = s_idle)
val abort_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES)))
val any_conflict = trackerList.map(_.io.has_conflict).reduce(_||_)
val all_busy = trackerList.map(_.io.busy).reduce(_&&_)
val want_to_abort = x_init.valid && (any_conflict || all_busy || (!x_init_data_dep_q.io.enq.ready && co.messageHasData(x_init.bits.payload)))
val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() }
for( i <- 0 until NGLOBAL_XACTS ) {
alloc_arb.io.in(i).valid := !trackerList(i).io.busy
trackerList(i).io.x_init.bits := x_init.bits
trackerList(i).io.x_init.valid := (abort_state === s_idle) && !want_to_abort && x_init.valid && alloc_arb.io.in(i).ready
trackerList(i).io.x_init_data.bits := x_init_data.bits
trackerList(i).io.x_init_data.valid := x_init_data.valid
trackerList(i).io.x_init_data_dep.bits := x_dep_deq.bits
trackerList(i).io.x_init_data_dep.valid := x_dep_deq.valid
}
val pop_x_init = trackerList.map(_.io.x_init.ready).reduce(_||_)
x_init.ready := (x_abort.valid && x_abort.ready) || pop_x_init
x_init_data.ready := (abort_state === s_abort_drain) || trackerList.map(_.io.x_init_data.ready).reduce(_||_)
x_init_data_dep_q.io.enq.valid := pop_x_init && co.messageHasData(x_init.bits.payload) && (abort_state === s_idle)
x_init_data_dep_q.io.enq.bits.global_xact_id := OHToUFix(trackerList.map(_.io.x_init.ready))
x_dep_deq.ready := trackerList.map(_.io.x_init_data_dep.ready).reduce(_||_)
alloc_arb.io.out.ready := x_init.valid
// Nack conflicting transaction init attempts
x_abort.bits.header.dst := x_init.bits.header.src
x_abort.bits.payload.tile_xact_id := x_init.bits.payload.tile_xact_id
x_abort.valid := Bool(false)
switch(abort_state) {
is(s_idle) {
when(want_to_abort) {
abort_state := Mux( co.messageHasData(x_init.bits.payload), s_abort_drain, s_abort_send)
}
}
is(s_abort_drain) { // raises x_init_data.ready below
when(x_init_data.valid) {
abort_cnt := abort_cnt + UFix(1)
when(abort_cnt === ~UFix(0, width = log2Up(REFILL_CYCLES))) {
abort_state := s_abort_send
}
}
}
is(s_abort_send) { // nothing is dequeued for now
x_abort.valid := Bool(true)
when(x_abort.ready) { // raises x_init.ready
abort_state := s_idle
}
}
}
// Handle probe request generation
val p_req_arb = (new Arbiter(NGLOBAL_XACTS)){(new LogicalNetworkIO){ new ProbeRequest }}
for( i <- 0 until NGLOBAL_XACTS ) {
val t = trackerList(i).io
p_req_arb.io.in(i).bits := t.p_req.bits
p_req_arb.io.in(i).valid := t.p_req.valid
t.p_req.ready := p_req_arb.io.in(i).ready
}
io.network.probe_req <> p_req_arb.io.out
// Handle probe replies, which may or may not have data
val p_rep = io.network.probe_rep
val p_rep_data = io.network.probe_rep_data
val idx = p_rep.bits.payload.global_xact_id
p_rep.ready := trackerList.map(_.io.p_rep.ready).reduce(_||_)
p_rep_data.ready := trackerList.map(_.io.p_rep_data.ready).reduce(_||_)
p_rep_data_dep_q.io.enq.valid := p_rep.valid && co.messageHasData(p_rep.bits.payload)
p_rep_data_dep_q.io.enq.bits.global_xact_id := p_rep.bits.payload.global_xact_id
p_rep_data_dep_q.io.deq.ready := trackerList.map(_.io.p_rep_data_dep.ready).reduce(_||_)
for( i <- 0 until NGLOBAL_XACTS ) {
trackerList(i).io.p_rep_data.valid := p_rep_data.valid
trackerList(i).io.p_rep_data.bits := p_rep_data.bits
trackerList(i).io.p_rep_data_dep.valid := p_rep_data_dep_q.io.deq.valid
trackerList(i).io.p_rep_data_dep.bits := p_rep_data_dep_q.io.deq.bits
trackerList(i).io.p_rep.valid := p_rep.valid && (idx === UFix(i))
trackerList(i).io.p_rep.bits := p_rep.bits
}
// Reply to initial requestor
// Forward memory responses from mem to tile or arbitrate to ack
val x_rep_arb = (new Arbiter(NGLOBAL_XACTS)){(new LogicalNetworkIO){ new TransactionReply }}
for( i <- 0 until NGLOBAL_XACTS ) {
val t = trackerList(i).io
x_rep_arb.io.in(i).bits := t.x_rep.bits
x_rep_arb.io.in(i).valid := t.x_rep.valid
t.x_rep.ready := x_rep_arb.io.in(i).ready
}
x_rep_arb.io.out.ready := Bool(false)
io.network.xact_rep.valid := x_rep_arb.io.out.valid
io.network.xact_rep.bits := x_rep_arb.io.out.bits
x_rep_arb.io.out.ready := io.network.xact_rep.ready
when(io.mem.resp.valid) {
io.network.xact_rep.valid := Bool(true)
io.network.xact_rep.bits := Vec(trackerList.map(_.io.x_rep.bits)){(new LogicalNetworkIO){new TransactionReply}}(io.mem.resp.bits.tag)
for( i <- 0 until NGLOBAL_XACTS ) {
trackerList(i).io.x_rep.ready := (io.mem.resp.bits.tag === UFix(i)) && io.network.xact_rep.ready
}
}
// Free finished transactions
val finish = io.network.xact_finish
for( i <- 0 until NGLOBAL_XACTS ) {
trackerList(i).io.free := finish.valid && (finish.bits.payload.global_xact_id === UFix(i))
}
finish.ready := Bool(true)
// Create an arbiter for the one memory port
// We have to arbitrate between the different trackers' memory requests
// and once we have picked a request, get the right write data
val mem_req_cmd_arb = (new Arbiter(NGLOBAL_XACTS)) { new MemReqCmd() }
val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() }
for( i <- 0 until NGLOBAL_XACTS ) {
mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd
mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data
mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock
}
io.mem.req_cmd <> Queue(mem_req_cmd_arb.io.out)
io.mem.req_data <> Queue(mem_req_data_arb.io.out)
}
class XactTracker(id: Int)(implicit conf: CoherenceHubConfiguration) extends Component {
val co = conf.co
implicit val ln = conf.ln
val io = new Bundle {
val x_init = (new FIFOIO){(new LogicalNetworkIO) { new TransactionInit }}.flip
val x_init_data = (new FIFOIO){(new LogicalNetworkIO) { new TransactionInitData }}.flip
val p_rep = (new FIFOIO){(new LogicalNetworkIO) { new ProbeReply }}.flip
val p_rep_data = (new FIFOIO){(new LogicalNetworkIO) { new ProbeReplyData }}.flip
val free = Bool(INPUT)
val tile_incoherent = Bits(INPUT, conf.ln.nTiles)
val p_rep_data_dep = (new FIFOIO) { new TrackerDependency }.flip
val x_init_data_dep = (new FIFOIO) { new TrackerDependency }.flip
val mem_resp = (new FIFOIO) { new MemResp }.flip
val mem_req_cmd = (new FIFOIO) { new MemReqCmd }
val mem_req_data = (new FIFOIO) { new MemData }
val mem_req_lock = Bool(OUTPUT)
val p_req = (new FIFOIO) {(new LogicalNetworkIO) { new ProbeRequest }}
val x_rep = (new FIFOIO) {(new LogicalNetworkIO) { new TransactionReply }}
val busy = Bool(OUTPUT)
val has_conflict = Bool(OUTPUT)
}
val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() }
val state = Reg(resetVal = s_idle)
val xact = Reg{ new TransactionInit }
val init_client_id_ = Reg(resetVal = UFix(0, width = log2Up(conf.ln.nTiles)))
//TODO: Will need id reg for merged release xacts
val init_sharer_cnt_ = Reg(resetVal = UFix(0, width = log2Up(conf.ln.nTiles)))
val p_rep_count = if (conf.ln.nTiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2Up(conf.ln.nTiles)))
val p_req_flags = Reg(resetVal = Bits(0, width = conf.ln.nTiles))
val x_needs_read = Reg(resetVal = Bool(false))
val x_init_data_needs_write = Reg(resetVal = Bool(false))
val p_rep_data_needs_write = Reg(resetVal = Bool(false))
val x_w_mem_cmd_sent = Reg(resetVal = Bool(false))
val p_w_mem_cmd_sent = Reg(resetVal = Bool(false))
val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES)))
val mem_cnt_next = mem_cnt + UFix(1)
val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES))
val p_req_initial_flags = Bits(width = conf.ln.nTiles)
p_req_initial_flags := Bits(0)
if (conf.ln.nTiles > 1) {
// issue self-probes for uncached read xacts to facilitate I$ coherence
// TODO: this is hackish; figure out how to do it more systematically
val probe_self = co match {
case u: CoherencePolicyWithUncached => u.isUncachedReadTransaction(io.x_init.bits.payload)
case _ => Bool(false)
}
val myflag = Mux(probe_self, Bits(0), UFixToOH(io.x_init.bits.header.src(log2Up(conf.ln.nTiles)-1,0)))
p_req_initial_flags := ~(io.tile_incoherent | myflag)
}
val all_x_reps_require_acks = Bool(true)
io.busy := state != s_idle
io.has_conflict := co.isCoherenceConflict(xact.addr, io.x_init.bits.payload.addr) && (state != s_idle)
io.mem_req_cmd.valid := Bool(false)
io.mem_req_cmd.bits.rw := Bool(false)
io.mem_req_cmd.bits.addr := xact.addr
io.mem_req_cmd.bits.tag := UFix(id)
io.mem_req_data.valid := Bool(false)
io.mem_req_data.bits.data := UFix(0)
io.mem_req_lock := Bool(false)
io.p_req.valid := Bool(false)
io.p_req.bits.payload.p_type := co.getProbeRequestType(xact.x_type, UFix(0))
io.p_req.bits.payload.global_xact_id := UFix(id)
io.p_req.bits.payload.addr := xact.addr
io.p_req.bits.header.dst := UFix(0)
io.x_rep.bits.payload.data := io.mem_resp.bits.data
io.x_rep.bits.payload.x_type := co.getTransactionReplyType(xact.x_type, init_sharer_cnt_)
io.x_rep.bits.payload.tile_xact_id := xact.tile_xact_id
io.x_rep.bits.payload.global_xact_id := UFix(id)
io.x_rep.bits.payload.require_ack := all_x_reps_require_acks
io.x_rep.bits.header.dst := init_client_id_
io.x_rep.valid := (io.mem_resp.valid && (UFix(id) === io.mem_resp.bits.tag))
io.x_init.ready := Bool(false)
io.x_init_data.ready := Bool(false)
io.x_init_data_dep.ready := Bool(false)
io.p_rep.ready := Bool(false)
io.p_rep_data.ready := Bool(false)
io.p_rep_data_dep.ready := Bool(false)
io.mem_resp.ready := io.x_rep.ready
switch (state) {
is(s_idle) {
when( io.x_init.valid ) {
xact := io.x_init.bits.payload
init_client_id_ := io.x_init.bits.header.src
init_sharer_cnt_ := UFix(conf.ln.nTiles) // TODO: Broadcast only
x_init_data_needs_write := co.messageHasData(io.x_init.bits.payload)
x_needs_read := co.needsMemRead(io.x_init.bits.payload.x_type, UFix(0))
p_req_flags := p_req_initial_flags
mem_cnt := UFix(0)
p_w_mem_cmd_sent := Bool(false)
x_w_mem_cmd_sent := Bool(false)
io.x_init.ready := Bool(true)
if(conf.ln.nTiles > 1) {
p_rep_count := PopCount(p_req_initial_flags)
state := Mux(p_req_initial_flags.orR, s_probe, s_mem)
} else state := s_mem
}
}
is(s_probe) {
val curr_p_id = PriorityEncoder(p_req_flags)
when(p_req_flags.orR) {
io.p_req.valid := Bool(true)
io.p_req.bits.header.dst := curr_p_id
}
when(io.p_req.ready) {
p_req_flags := p_req_flags & ~(UFixToOH(curr_p_id))
}
when(io.p_rep.valid) {
io.p_rep.ready := Bool(true)
if(conf.ln.nTiles > 1) p_rep_count := p_rep_count - UFix(1)
when(p_rep_count === UFix(1)) {
state := s_mem
}
p_rep_data_needs_write := co.messageHasData(io.p_rep.bits.payload)
}
}
is(s_mem) {
when (p_rep_data_needs_write) {
doMemReqWrite(io.mem_req_cmd,
io.mem_req_data,
io.mem_req_lock,
io.p_rep_data,
p_rep_data_needs_write,
p_w_mem_cmd_sent,
io.p_rep_data_dep.ready,
io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)))
} . elsewhen(x_init_data_needs_write) {
doMemReqWrite(io.mem_req_cmd,
io.mem_req_data,
io.mem_req_lock,
io.x_init_data,
x_init_data_needs_write,
x_w_mem_cmd_sent,
io.x_init_data_dep.ready,
io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)))
} . elsewhen (x_needs_read) {
doMemReqRead(io.mem_req_cmd, x_needs_read)
} . otherwise {
state := Mux(co.needsAckReply(xact.x_type, UFix(0)), s_ack,
Mux(all_x_reps_require_acks, s_busy, s_idle))
}
}
is(s_ack) {
io.x_rep.valid := Bool(true)
when(io.x_rep.ready) { state := Mux(all_x_reps_require_acks, s_busy, s_idle) }
}
is(s_busy) { // Nothing left to do but wait for transaction to complete
when (io.free) {
state := s_idle
}
}
}
def doMemReqWrite[T <: Data](req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: FIFOIO[LogicalNetworkIO[T]], trigger: Bool, cmd_sent: Bool, pop_dep: Bool, at_front_of_dep_queue: Bool) {
req_cmd.bits.rw := Bool(true)
req_data.bits := data.bits.payload
when(req_cmd.ready && req_cmd.valid) {
cmd_sent := Bool(true)
}
when (at_front_of_dep_queue) {
req_cmd.valid := !cmd_sent && req_data.ready && data.valid
lock := data.valid || cmd_sent
when (req_cmd.ready || cmd_sent) {
req_data.valid := data.valid
when(req_data.ready) {
data.ready:= Bool(true)
when (data.valid) {
mem_cnt := mem_cnt_next
when(mem_cnt === UFix(REFILL_CYCLES-1)) {
pop_dep := Bool(true)
trigger := Bool(false)
}
}
}
}
}
}
def doMemReqRead(req_cmd: FIFOIO[MemReqCmd], trigger: Bool) {
req_cmd.valid := Bool(true)
req_cmd.bits.rw := Bool(false)
when(req_cmd.ready) {
trigger := Bool(false)
}
}
}