From ecdfb528c5d8491ff5a645b33d1204d880ddcdef Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 7 Sep 2016 13:55:22 -0700 Subject: [PATCH 01/10] crossing: refactor AsyncDecoupled to provide AsyncDecoupledCrossing with no clock domain --- src/main/scala/junctions/crossing.scala | 86 ++++++++----------- src/main/scala/junctions/nasti.scala | 45 +++++----- .../scala/rocketchip/DebugTransport.scala | 27 +++--- src/main/scala/uncore/devices/Debug.scala | 30 ++++--- 4 files changed, 91 insertions(+), 97 deletions(-) diff --git a/src/main/scala/junctions/crossing.scala b/src/main/scala/junctions/crossing.scala index 0f71362e..802504c0 100644 --- a/src/main/scala/junctions/crossing.scala +++ b/src/main/scala/junctions/crossing.scala @@ -1,13 +1,13 @@ package junctions import Chisel._ -class Crossing[T <: Data](gen: T, enq_sync: Boolean, deq_sync: Boolean) extends Bundle { +class Crossing[T <: Data](gen: T) extends Bundle { val enq = Decoupled(gen).flip() val deq = Decoupled(gen) - val enq_clock = if (enq_sync) Some(Clock(INPUT)) else None - val deq_clock = if (deq_sync) Some(Clock(INPUT)) else None - val enq_reset = if (enq_sync) Some(Bool(INPUT)) else None - val deq_reset = if (deq_sync) Some(Bool(INPUT)) else None + val enq_clock = Clock(INPUT) + val deq_clock = Clock(INPUT) + val enq_reset = Bool(INPUT) + val deq_reset = Bool(INPUT) } // Output is 1 for one cycle after any edge of 'in' @@ -87,11 +87,11 @@ class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool } class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module { - val io = new Crossing(gen, true, true) + val io = new Crossing(gen) require (sync >= 2) - val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock.get, io.enq_reset.get)) - val sink = Module(new AsyncHandshakeSink (gen, sync, io.deq_clock.get, io.deq_reset.get)) + val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock, io.enq_reset)) + val sink = Module(new AsyncHandshakeSink (gen, sync, io.deq_clock, io.deq_reset)) source.io.enq <> io.enq io.deq <> sink.io.deq @@ -101,50 +101,38 @@ class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module { source.io.pop := sink.io.pop } -class AsyncDecoupledTo[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module { - val io = new Crossing(gen, false, true) +class AsyncScope extends Module { val io = new Bundle } +object AsyncScope { def apply() = Module(new AsyncScope) } - // !!! if depth == 0 { use Handshake } else { use AsyncFIFO } - val crossing = Module(new AsyncHandshake(gen, sync)).io - crossing.enq_clock.get := clock - crossing.enq_reset.get := reset - crossing.enq <> io.enq - crossing.deq_clock.get := io.deq_clock.get - crossing.deq_reset.get := io.deq_reset.get - io.deq <> crossing.deq -} - -object AsyncDecoupledTo { - // source is in our clock domain, output is in the 'to' clock domain - def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = { - val to = Module(new AsyncDecoupledTo(source.bits, depth, sync)) - to.io.deq_clock.get := to_clock - to.io.deq_reset.get := to_reset - to.io.enq <> source - to.io.deq +object AsyncDecoupledCrossing +{ + // takes from_source from the 'from' clock domain and puts it into the 'to' clock domain + def apply[T <: Data](from_clock: Clock, from_reset: Bool, from_source: DecoupledIO[T], to_clock: Clock, to_reset: Bool, depth: Int = 3, sync: Int = 2): DecoupledIO[T] = { + // !!! if depth == 0 { use Handshake } else { use AsyncFIFO } + val crossing = Module(new AsyncHandshake(from_source.bits, sync)).io + crossing.enq_clock := from_clock + crossing.enq_reset := from_reset + crossing.enq <> from_source + crossing.deq_clock := to_clock + crossing.deq_reset := to_reset + crossing.deq } } -class AsyncDecoupledFrom[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module { - val io = new Crossing(gen, true, false) - - // !!! if depth == 0 { use Handshake } else { use AsyncFIFO } - val crossing = Module(new AsyncHandshake(gen, sync)).io - crossing.enq_clock.get := io.enq_clock.get - crossing.enq_reset.get := io.enq_reset.get - crossing.enq <> io.enq - crossing.deq_clock.get := clock - crossing.deq_reset.get := reset - io.deq <> crossing.deq -} - -object AsyncDecoupledFrom { - // source is in the 'from' clock domain, output is in our clock domain - def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = { - val from = Module(new AsyncDecoupledFrom(source.bits, depth, sync)) - from.io.enq_clock.get := from_clock - from.io.enq_reset.get := from_reset - from.io.enq <> source - from.io.deq +object AsyncDecoupledTo +{ + // takes source from your clock domain and puts it into the 'to' clock domain + def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 3, sync: Int = 2): DecoupledIO[T] = { + val scope = AsyncScope() + AsyncDecoupledCrossing(scope.clock, scope.reset, source, to_clock, to_reset, depth, sync) + } +} + +object AsyncDecoupledFrom +{ + // takes from_source from the 'from' clock domain and puts it into your clock domain + def apply[T <: Data](from_clock: Clock, from_reset: Bool, from_source: DecoupledIO[T], depth: Int = 3, sync: Int = 2): DecoupledIO[T] = { + val scope = AsyncScope() + AsyncDecoupledCrossing(from_clock, from_reset, from_source, scope.clock, scope.reset, depth, sync) } } diff --git a/src/main/scala/junctions/nasti.scala b/src/main/scala/junctions/nasti.scala index 41251ac3..3ae1fbc2 100644 --- a/src/main/scala/junctions/nasti.scala +++ b/src/main/scala/junctions/nasti.scala @@ -706,32 +706,33 @@ class NastiMemoryDemux(nRoutes: Int)(implicit p: Parameters) extends NastiModule } } +object AsyncNastiCrossing { + // takes from_source from the 'from' clock domain to the 'to' clock domain + def apply(from_clock: Clock, from_reset: Bool, from_source: NastiIO, to_clock: Clock, to_reset: Bool, depth: Int = 3, sync: Int = 2) = { + val to_sink = Wire(new NastiIO()(from_source.p)) + + to_sink.aw <> AsyncDecoupledCrossing(from_clock, from_reset, from_source.aw, to_clock, to_reset, depth, sync) + to_sink.ar <> AsyncDecoupledCrossing(from_clock, from_reset, from_source.ar, to_clock, to_reset, depth, sync) + to_sink.w <> AsyncDecoupledCrossing(from_clock, from_reset, from_source.w, to_clock, to_reset, depth, sync) + from_source.b <> AsyncDecoupledCrossing(to_clock, to_reset, to_sink.b, from_clock, from_reset, depth, sync) + from_source.r <> AsyncDecoupledCrossing(to_clock, to_reset, to_sink.r, from_clock, from_reset, depth, sync) + + to_sink // is now to_source + } +} + object AsyncNastiTo { - // source(master) is in our clock domain, output is in the 'to' clock domain - def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = { - val sink = Wire(new NastiIO) - - sink.aw <> AsyncDecoupledTo(to_clock, to_reset, source.aw, depth, sync) - sink.ar <> AsyncDecoupledTo(to_clock, to_reset, source.ar, depth, sync) - sink.w <> AsyncDecoupledTo(to_clock, to_reset, source.w, depth, sync) - source.b <> AsyncDecoupledFrom(to_clock, to_reset, sink.b, depth, sync) - source.r <> AsyncDecoupledFrom(to_clock, to_reset, sink.r, depth, sync) - - sink + // takes source from your clock domain and puts it into the 'to' clock domain + def apply(to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2): NastiIO = { + val scope = AsyncScope() + AsyncNastiCrossing(scope.clock, scope.reset, source, to_clock, to_reset, depth, sync) } } object AsyncNastiFrom { - // source(master) is in the 'from' clock domain, output is in our clock domain - def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = { - val sink = Wire(new NastiIO) - - sink.aw <> AsyncDecoupledFrom(from_clock, from_reset, source.aw, depth, sync) - sink.ar <> AsyncDecoupledFrom(from_clock, from_reset, source.ar, depth, sync) - sink.w <> AsyncDecoupledFrom(from_clock, from_reset, source.w, depth, sync) - source.b <> AsyncDecoupledTo(from_clock, from_reset, sink.b, depth, sync) - source.r <> AsyncDecoupledTo(from_clock, from_reset, sink.r, depth, sync) - - sink + // takes from_source from the 'from' clock domain and puts it into your clock domain + def apply(from_clock: Clock, from_reset: Bool, from_source: NastiIO, depth: Int = 3, sync: Int = 2): NastiIO = { + val scope = AsyncScope() + AsyncNastiCrossing(from_clock, from_reset, from_source, scope.clock, scope.reset, depth, sync) } } diff --git a/src/main/scala/rocketchip/DebugTransport.scala b/src/main/scala/rocketchip/DebugTransport.scala index 92fd0f29..846a6500 100644 --- a/src/main/scala/rocketchip/DebugTransport.scala +++ b/src/main/scala/rocketchip/DebugTransport.scala @@ -86,19 +86,19 @@ class JtagDTMWithSync(implicit val p: Parameters) extends Module { } else { val req_sync = Module (new AsyncMailbox()) val resp_sync = Module (new AsyncMailbox()) - req_sync.io.enq := jtag_dtm.io.dtm_req - req_sync.io.enq_clock.get := io.jtag.TCK - req_sync.io.enq_reset.get := io.jtag.TRST - req_sync.io.deq_clock.get := clock - req_sync.io.deq_reset.get := reset - dtm_req := req_sync.io.deq + req_sync.io.enq := jtag_dtm.io.dtm_req + req_sync.io.enq_clock := io.jtag.TCK + req_sync.io.enq_reset := io.jtag.TRST + req_sync.io.deq_clock := clock + req_sync.io.deq_reset := reset + dtm_req := req_sync.io.deq - jtag_dtm.io.dtm_resp := resp_sync.io.deq - resp_sync.io.deq_clock.get := io.jtag.TCK - resp_sync.io.deq_reset.get := io.jtag.TRST - resp_sync.io.enq_clock.get := clock - resp_sync.io.enq_reset.get := reset - resp_sync.io.enq := dtm_resp + jtag_dtm.io.dtm_resp := resp_sync.io.deq + resp_sync.io.deq_clock := io.jtag.TCK + resp_sync.io.deq_reset := io.jtag.TRST + resp_sync.io.enq_clock := clock + resp_sync.io.enq_reset := reset + resp_sync.io.enq := dtm_resp } } @@ -121,6 +121,5 @@ class AsyncMailbox extends BlackBox { // this mailbox just has a fixed width of 64 bits, which is enough // for our specific purpose here. - val io = new Crossing(UInt(width=64), true, true) - + val io = new Crossing(UInt(width=64)) } diff --git a/src/main/scala/uncore/devices/Debug.scala b/src/main/scala/uncore/devices/Debug.scala index 7443c9ac..310bdc68 100644 --- a/src/main/scala/uncore/devices/Debug.scala +++ b/src/main/scala/uncore/devices/Debug.scala @@ -982,22 +982,28 @@ class DebugModule ()(implicit val p:cde.Parameters) } +object AsyncDebugBusCrossing { + // takes from_source from the 'from' clock domain to the 'to' clock domain + def apply(from_clock: Clock, from_reset: Bool, from_source: DebugBusIO, to_clock: Clock, to_reset: Bool, depth: Int = 3, sync: Int = 2) = { + val to_sink = Wire(new DebugBusIO()(from_source.p)) + to_sink.req <> AsyncDecoupledCrossing(from_clock, from_reset, from_source.req, to_clock, to_reset, depth, sync) + from_source.resp <> AsyncDecoupledCrossing(to_clock, to_reset, to_sink.resp, from_clock, from_reset, depth, sync) + to_sink // is now to_source + } +} + object AsyncDebugBusFrom { // OutsideClockDomain - def apply(from_clock: Clock, from_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2)(implicit p: Parameters): DebugBusIO = { - val sink = Wire(new DebugBusIO) - sink.req <> AsyncDecoupledFrom(from_clock, from_reset, source.req) - source.resp <> AsyncDecoupledTo(from_clock, from_reset, sink.resp) - sink + // takes from_source from the 'from' clock domain and puts it into your clock domain + def apply(from_clock: Clock, from_reset: Bool, from_source: DebugBusIO, depth: Int = 0, sync: Int = 2): DebugBusIO = { + val scope = AsyncScope() + AsyncDebugBusCrossing(from_clock, from_reset, from_source, scope.clock, scope.reset, depth, sync) } } object AsyncDebugBusTo { // OutsideClockDomain - def apply(to_clock: Clock, to_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2)(implicit p: Parameters): DebugBusIO = { - val sink = Wire(new DebugBusIO) - sink.req <> AsyncDecoupledTo(to_clock, to_reset, source.req) - source.resp <> AsyncDecoupledFrom(to_clock, to_reset, sink.resp) - sink + // takes source from your clock domain and puts it into the 'to' clock domain + def apply(to_clock: Clock, to_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2): DebugBusIO = { + val scope = AsyncScope() + AsyncDebugBusCrossing(scope.clock, scope.reset, source, to_clock, to_reset, depth, sync) } } - - From 8142406d2e5a0138c8b520ecc4048e7a200c3ec2 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 15:34:56 -0700 Subject: [PATCH 02/10] junctions: refactor the Crossing type --- src/main/scala/junctions/crossing.scala | 24 ++++++++++++------- .../scala/rocketchip/DebugTransport.scala | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/main/scala/junctions/crossing.scala b/src/main/scala/junctions/crossing.scala index 802504c0..f0db6f86 100644 --- a/src/main/scala/junctions/crossing.scala +++ b/src/main/scala/junctions/crossing.scala @@ -1,13 +1,19 @@ package junctions import Chisel._ -class Crossing[T <: Data](gen: T) extends Bundle { - val enq = Decoupled(gen).flip() - val deq = Decoupled(gen) - val enq_clock = Clock(INPUT) - val deq_clock = Clock(INPUT) - val enq_reset = Bool(INPUT) - val deq_reset = Bool(INPUT) +class CrossingIO[T <: Data](gen: T) extends Bundle { + // Enqueue clock domain + val enq_clock = Clock(INPUT) + val enq_reset = Bool(INPUT) // synchronously deasserted wrt. enq_clock + val enq = Decoupled(gen).flip() + // Dequeue clock domain + val deq_clock = Clock(INPUT) + val deq_reset = Bool(INPUT) // synchronously deasserted wrt. deq_clock + val deq = Decoupled(gen) +} + +abstract class Crossing[T <: Data] extends Module { + val io: CrossingIO[T] } // Output is 1 for one cycle after any edge of 'in' @@ -86,8 +92,8 @@ class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool } } -class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module { - val io = new Crossing(gen) +class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Crossing[T] { + val io = new CrossingIO(gen) require (sync >= 2) val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock, io.enq_reset)) diff --git a/src/main/scala/rocketchip/DebugTransport.scala b/src/main/scala/rocketchip/DebugTransport.scala index 846a6500..1371a16d 100644 --- a/src/main/scala/rocketchip/DebugTransport.scala +++ b/src/main/scala/rocketchip/DebugTransport.scala @@ -121,5 +121,5 @@ class AsyncMailbox extends BlackBox { // this mailbox just has a fixed width of 64 bits, which is enough // for our specific purpose here. - val io = new Crossing(UInt(width=64)) + val io = new CrossingIO(UInt(width=64)) } From d75f9d6a34d015674bfcff53eafff9d6d538c6b9 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 15:30:09 -0700 Subject: [PATCH 03/10] junctions: add an AsyncQueue --- src/main/scala/junctions/asyncqueue.scala | 88 +++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/main/scala/junctions/asyncqueue.scala diff --git a/src/main/scala/junctions/asyncqueue.scala b/src/main/scala/junctions/asyncqueue.scala new file mode 100644 index 00000000..585fa9e9 --- /dev/null +++ b/src/main/scala/junctions/asyncqueue.scala @@ -0,0 +1,88 @@ +// See LICENSE for license details. + +package junctions +import Chisel._ + +object GrayCounter { + def apply(bits: Int, increment: Bool = Bool(true)): UInt = { + val binary = RegInit(UInt(0, width = bits)) + val incremented = binary + increment.asUInt() + binary := incremented + incremented ^ (incremented >> UInt(1)) + } +} + +object AsyncGrayCounter { + def apply(in: UInt, sync: Int): UInt = { + val syncv = RegInit(Vec.fill(sync){UInt(0, width = in.getWidth)}) + syncv.last := in + (syncv.init zip syncv.tail).foreach { case (sink, source) => sink := source } + syncv(0) + } +} + +class AsyncQueueSource[T <: Data](gen: T, depth: Int, sync: Int, clockIn: Clock, resetIn: Bool) + extends Module(_clock = clockIn, _reset = resetIn) { + val bits = log2Ceil(depth) + val io = new Bundle { + // These come from the source domain + val enq = Decoupled(gen).flip() + // These cross to the sink clock domain + val ridx = UInt(INPUT, width = bits+1) + val widx = UInt(OUTPUT, width = bits+1) + val mem = Vec(depth, gen).asOutput + } + + val mem = Reg(Vec(depth, gen)) + val widx = GrayCounter(bits+1, io.enq.fire()) + val ridx = AsyncGrayCounter(io.ridx, sync) + val ready = widx =/= (ridx ^ UInt(depth | depth >> 1)) + + val index = if (depth == 1) UInt(0) else io.widx(bits-1, 0) ^ (io.widx(bits, bits) << (bits-1)) + when (io.enq.fire() && !reset) { mem(index) := io.enq.bits } + io.enq.ready := RegNext(ready, Bool(false)) + io.widx := RegNext(widx, UInt(0)) + io.mem := mem +} + +class AsyncQueueSink[T <: Data](gen: T, depth: Int, sync: Int, clockIn: Clock, resetIn: Bool) + extends Module(_clock = clockIn, _reset = resetIn) { + val bits = log2Ceil(depth) + val io = new Bundle { + // These come from the sink domain + val deq = Decoupled(gen) + // These cross to the source clock domain + val ridx = UInt(OUTPUT, width = bits+1) + val widx = UInt(INPUT, width = bits+1) + val mem = Vec(depth, gen).asInput + } + + val ridx = GrayCounter(bits+1, io.deq.fire()) + val widx = AsyncGrayCounter(io.widx, sync) + val valid = ridx =/= widx + + // The mux is safe because timing analysis ensures ridx has reached the register + // On an ASIC, changes to the unread location cannot affect the selected value + // On an FPGA, only one input changes at a time => mem updates don't cause glitches + // The register only latches when the selected valued is not being written + val index = if (depth == 1) UInt(0) else ridx(bits-1, 0) ^ (ridx(bits, bits) << (bits-1)) + io.deq.bits := RegEnable(io.mem(index), valid && !reset) + io.deq.valid := RegNext(valid, Bool(false)) + io.ridx := RegNext(ridx, UInt(0)) +} + +class AsyncQueue[T <: Data](gen: T, depth: Int = 8, sync: Int = 3) extends Crossing[T] { + require (sync >= 2) + require (depth > 0 && isPow2(depth)) + + val io = new CrossingIO(gen) + val source = Module(new AsyncQueueSource(gen, depth, sync, io.enq_clock, io.enq_reset)) + val sink = Module(new AsyncQueueSink (gen, depth, sync, io.deq_clock, io.deq_reset)) + + source.io.enq <> io.enq + io.deq <> sink.io.deq + + sink.io.mem := source.io.mem + sink.io.widx := source.io.widx + source.io.ridx := sink.io.ridx +} From fe6a67dd0eaac249fb1f40ddfb3a3b31a25a5e49 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 15:26:59 -0700 Subject: [PATCH 04/10] tilelink2: add a RegisterCrossing primitive --- .../scala/uncore/tilelink2/RegField.scala | 13 ++ .../uncore/tilelink2/RegisterCrossing.scala | 130 ++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/main/scala/uncore/tilelink2/RegisterCrossing.scala diff --git a/src/main/scala/uncore/tilelink2/RegField.scala b/src/main/scala/uncore/tilelink2/RegField.scala index 1fac6a84..c3f758fc 100644 --- a/src/main/scala/uncore/tilelink2/RegField.scala +++ b/src/main/scala/uncore/tilelink2/RegField.scala @@ -17,6 +17,12 @@ object RegReadFn // effects must become visible on the cycle after ovalid && oready implicit def apply(x: (Bool, Bool) => (Bool, Bool, UInt)) = new RegReadFn(false, x) + implicit def apply(x: RegisterReadIO[UInt]): RegReadFn = + RegReadFn((ivalid, oready) => { + x.request.valid := ivalid + x.response.ready := oready + (x.request.ready, x.response.valid, x.response.bits) + }) // (ready: Bool) => (valid: Bool, data: UInt) // valid must not combinationally depend on ready // valid must eventually go high without requiring ready to go high @@ -47,6 +53,13 @@ object RegWriteFn // effects must become visible on the cycle after ovalid && oready implicit def apply(x: (Bool, Bool, UInt) => (Bool, Bool)) = new RegWriteFn(false, x) + implicit def apply(x: RegisterWriteIO[UInt]): RegWriteFn = + RegWriteFn((ivalid, oready, data) => { + x.request.valid := ivalid + x.request.bits := data + x.response.ready := oready + (x.request.ready, x.response.valid) + }) // (valid: Bool, data: UInt) => (ready: Bool) // ready may combinationally depend on data (but not valid) // ready must eventually go high without requiring valid to go high diff --git a/src/main/scala/uncore/tilelink2/RegisterCrossing.scala b/src/main/scala/uncore/tilelink2/RegisterCrossing.scala new file mode 100644 index 00000000..36a5606e --- /dev/null +++ b/src/main/scala/uncore/tilelink2/RegisterCrossing.scala @@ -0,0 +1,130 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import junctions._ + +// A very simple flow control state machine, run in the specified clock domain +class BusyRegisterCrossing(clock: Clock, reset: Bool) + extends Module(_clock = clock, _reset = reset) { + val io = new Bundle { + val progress = Bool(INPUT) + val request_valid = Bool(INPUT) + val response_ready = Bool(INPUT) + val busy = Bool(OUTPUT) + } + + val busy = RegInit(Bool(false)) + when (io.progress) { + busy := Mux(busy, !io.response_ready, io.request_valid) + } + io.busy := busy +} + +// RegField should support connecting to one of these +class RegisterWriteIO[T <: Data](gen: T) extends Bundle { + val request = Decoupled(gen).flip() + val response = Decoupled(Bool()) // ignore .bits +} + +// To turn on/off a domain: +// 1. lower allow on the other side +// 2. wait for inflight traffic to resolve +// 3. turn off the domain +// 4. assert reset in the domain +// 5. turn on the domain +// 6. deassert reset in the domain +// 7. raise allow on the other side + +class RegisterWriteCrossingIO[T <: Data](gen: T) extends Bundle { + // Master clock domain + val master_clock = Clock(INPUT) + val master_reset = Bool(INPUT) + val master_allow = Bool(INPUT) // actually wait for the slave + val master_port = new RegisterWriteIO(gen) + // Slave clock domain + val slave_clock = Clock(INPUT) + val slave_reset = Bool(INPUT) + val slave_allow = Bool(INPUT) // honour requests from the master + val slave_register = gen.asOutput +} + +class RegisterWriteCrossing[T <: Data](gen: T, sync: Int = 3) extends Module { + val io = new RegisterWriteCrossingIO(gen) + // The crossing must only allow one item inflight at a time + val crossing = Module(new AsyncQueue(gen, 1, sync)) + + // We can just randomly reset one-side of a single entry AsyncQueue. + // If the enq side is reset, at worst deq.bits is reassigned from mem(0), which stays fixed. + // If the deq side is reset, at worst the master rewrites mem(0) once, deq.bits stays fixed. + crossing.io.enq_clock := io.master_clock + crossing.io.enq_reset := io.master_reset || !io.master_allow + crossing.io.deq_clock := io.slave_clock + crossing.io.deq_reset := io.slave_reset || !io.slave_allow + + crossing.io.enq.bits := io.master_port.request.bits + io.slave_register := crossing.io.deq.bits + + // If the slave is not operational, just drop the write. + val progress = crossing.io.enq.ready || !io.master_allow + + val reg = Module(new BusyRegisterCrossing(io.master_clock, io.master_reset)) + reg.io.progress := progress + reg.io.request_valid := io.master_port.request.valid + reg.io.response_ready := io.master_port.response.ready + + crossing.io.deq.ready := Bool(true) + crossing.io.enq.valid := io.master_port.request.valid && !reg.io.busy + io.master_port.request.ready := progress && !reg.io.busy + io.master_port.response.valid := progress && reg.io.busy +} + +// RegField should support connecting to one of these +class RegisterReadIO[T <: Data](gen: T) extends Bundle { + val request = Decoupled(Bool()).flip() // ignore .bits + val response = Decoupled(gen) +} + +class RegisterReadCrossingIO[T <: Data](gen: T) extends Bundle { + // Master clock domain + val master_clock = Clock(INPUT) + val master_reset = Bool(INPUT) + val master_allow = Bool(INPUT) // actually wait for the slave + val master_port = new RegisterReadIO(gen) + // Slave clock domain + val slave_clock = Clock(INPUT) + val slave_reset = Bool(INPUT) + val slave_allow = Bool(INPUT) // honour requests from the master + val slave_register = gen.asInput +} + +class RegisterReadCrossing[T <: Data](gen: T, sync: Int = 3) extends Module { + val io = new RegisterReadCrossingIO(gen) + // The crossing must only allow one item inflight at a time + val crossing = Module(new AsyncQueue(gen, 1, sync)) + + // We can just randomly reset one-side of a single entry AsyncQueue. + // If the enq side is reset, at worst deq.bits is reassigned from mem(0), which stays fixed. + // If the deq side is reset, at worst the slave rewrites mem(0) once, deq.bits stays fixed. + crossing.io.enq_clock := io.slave_clock + crossing.io.enq_reset := io.slave_reset || !io.slave_allow + crossing.io.deq_clock := io.master_clock + crossing.io.deq_reset := io.master_reset || !io.master_allow + + crossing.io.enq.bits := io.slave_register + io.master_port.response.bits := crossing.io.deq.bits + + // If the slave is not operational, just repeat the last value we saw. + val progress = crossing.io.deq.valid || !io.master_allow + + val reg = Module(new BusyRegisterCrossing(io.master_clock, io.master_reset)) + reg.io.progress := progress + reg.io.request_valid := io.master_port.request.valid + reg.io.response_ready := io.master_port.response.ready + + io.master_port.response.valid := progress && reg.io.busy + io.master_port.request.ready := progress && !reg.io.busy + crossing.io.deq.ready := io.master_port.request.valid && !reg.io.busy + crossing.io.enq.valid := Bool(true) +} From 3348236320a4576a35ee8666675d9fc260a60d06 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 15:45:52 -0700 Subject: [PATCH 05/10] junctions: remove obsolete Handshaker crossing --- src/main/scala/junctions/crossing.scala | 100 +----------------------- 1 file changed, 4 insertions(+), 96 deletions(-) diff --git a/src/main/scala/junctions/crossing.scala b/src/main/scala/junctions/crossing.scala index f0db6f86..948c3a21 100644 --- a/src/main/scala/junctions/crossing.scala +++ b/src/main/scala/junctions/crossing.scala @@ -16,106 +16,14 @@ abstract class Crossing[T <: Data] extends Module { val io: CrossingIO[T] } -// Output is 1 for one cycle after any edge of 'in' -object AsyncHandshakePulse { - def apply(in: Bool, sync: Int): Bool = { - val syncv = RegInit(Vec.fill(sync+1){Bool(false)}) - syncv.last := in - (syncv.init zip syncv.tail).foreach { case (sink, source) => sink := source } - syncv(0) =/= syncv(1) - } -} - -class AsyncHandshakeSource[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool) - extends Module(_clock = clock, _reset = reset) { - val io = new Bundle { - // These come from the source clock domain - val enq = Decoupled(gen).flip() - // These cross to the sink clock domain - val bits = gen.cloneType.asOutput - val push = Bool(OUTPUT) - val pop = Bool(INPUT) - } - - val ready = RegInit(Bool(true)) - val bits = Reg(gen) - val push = RegInit(Bool(false)) - - io.enq.ready := ready - io.bits := bits - io.push := push - - val pop = AsyncHandshakePulse(io.pop, sync) - assert (!pop || !ready) - - when (pop) { - ready := Bool(true) - } - - when (io.enq.fire()) { - ready := Bool(false) - bits := io.enq.bits - push := !push - } -} - -class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool) - extends Module(_clock = clock, _reset = reset) { - val io = new Bundle { - // These cross to the source clock domain - val bits = gen.cloneType.asInput - val push = Bool(INPUT) - val pop = Bool(OUTPUT) - // These go to the sink clock domain - val deq = Decoupled(gen) - } - - val valid = RegInit(Bool(false)) - val bits = Reg(gen) - val pop = RegInit(Bool(false)) - - io.deq.valid := valid - io.deq.bits := bits - io.pop := pop - - val push = AsyncHandshakePulse(io.push, sync) - assert (!push || !valid) - - when (push) { - valid := Bool(true) - bits := io.bits - } - - when (io.deq.fire()) { - valid := Bool(false) - pop := !pop - } -} - -class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Crossing[T] { - val io = new CrossingIO(gen) - require (sync >= 2) - - val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock, io.enq_reset)) - val sink = Module(new AsyncHandshakeSink (gen, sync, io.deq_clock, io.deq_reset)) - - source.io.enq <> io.enq - io.deq <> sink.io.deq - - sink.io.bits := source.io.bits - sink.io.push := source.io.push - source.io.pop := sink.io.pop -} - class AsyncScope extends Module { val io = new Bundle } object AsyncScope { def apply() = Module(new AsyncScope) } object AsyncDecoupledCrossing { // takes from_source from the 'from' clock domain and puts it into the 'to' clock domain - def apply[T <: Data](from_clock: Clock, from_reset: Bool, from_source: DecoupledIO[T], to_clock: Clock, to_reset: Bool, depth: Int = 3, sync: Int = 2): DecoupledIO[T] = { - // !!! if depth == 0 { use Handshake } else { use AsyncFIFO } - val crossing = Module(new AsyncHandshake(from_source.bits, sync)).io + def apply[T <: Data](from_clock: Clock, from_reset: Bool, from_source: DecoupledIO[T], to_clock: Clock, to_reset: Bool, depth: Int = 8, sync: Int = 3): DecoupledIO[T] = { + val crossing = Module(new AsyncQueue(from_source.bits, depth, sync)).io crossing.enq_clock := from_clock crossing.enq_reset := from_reset crossing.enq <> from_source @@ -128,7 +36,7 @@ object AsyncDecoupledCrossing object AsyncDecoupledTo { // takes source from your clock domain and puts it into the 'to' clock domain - def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 3, sync: Int = 2): DecoupledIO[T] = { + def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 8, sync: Int = 3): DecoupledIO[T] = { val scope = AsyncScope() AsyncDecoupledCrossing(scope.clock, scope.reset, source, to_clock, to_reset, depth, sync) } @@ -137,7 +45,7 @@ object AsyncDecoupledTo object AsyncDecoupledFrom { // takes from_source from the 'from' clock domain and puts it into your clock domain - def apply[T <: Data](from_clock: Clock, from_reset: Bool, from_source: DecoupledIO[T], depth: Int = 3, sync: Int = 2): DecoupledIO[T] = { + def apply[T <: Data](from_clock: Clock, from_reset: Bool, from_source: DecoupledIO[T], depth: Int = 8, sync: Int = 3): DecoupledIO[T] = { val scope = AsyncScope() AsyncDecoupledCrossing(from_clock, from_reset, from_source, scope.clock, scope.reset, depth, sync) } From 44501cdbf80f2f159d41441d3a6e9f4a59e52344 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 15:49:08 -0700 Subject: [PATCH 06/10] crossings: change defaults to sync=3 for safer settling time Make the matching AsyncQueue depth=8 to support full throughput --- src/main/scala/junctions/nasti.scala | 6 +++--- src/main/scala/uncore/devices/Debug.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/junctions/nasti.scala b/src/main/scala/junctions/nasti.scala index 3ae1fbc2..43803153 100644 --- a/src/main/scala/junctions/nasti.scala +++ b/src/main/scala/junctions/nasti.scala @@ -708,7 +708,7 @@ class NastiMemoryDemux(nRoutes: Int)(implicit p: Parameters) extends NastiModule object AsyncNastiCrossing { // takes from_source from the 'from' clock domain to the 'to' clock domain - def apply(from_clock: Clock, from_reset: Bool, from_source: NastiIO, to_clock: Clock, to_reset: Bool, depth: Int = 3, sync: Int = 2) = { + def apply(from_clock: Clock, from_reset: Bool, from_source: NastiIO, to_clock: Clock, to_reset: Bool, depth: Int = 8, sync: Int = 3) = { val to_sink = Wire(new NastiIO()(from_source.p)) to_sink.aw <> AsyncDecoupledCrossing(from_clock, from_reset, from_source.aw, to_clock, to_reset, depth, sync) @@ -723,7 +723,7 @@ object AsyncNastiCrossing { object AsyncNastiTo { // takes source from your clock domain and puts it into the 'to' clock domain - def apply(to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2): NastiIO = { + def apply(to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 8, sync: Int = 3): NastiIO = { val scope = AsyncScope() AsyncNastiCrossing(scope.clock, scope.reset, source, to_clock, to_reset, depth, sync) } @@ -731,7 +731,7 @@ object AsyncNastiTo { object AsyncNastiFrom { // takes from_source from the 'from' clock domain and puts it into your clock domain - def apply(from_clock: Clock, from_reset: Bool, from_source: NastiIO, depth: Int = 3, sync: Int = 2): NastiIO = { + def apply(from_clock: Clock, from_reset: Bool, from_source: NastiIO, depth: Int = 8, sync: Int = 3): NastiIO = { val scope = AsyncScope() AsyncNastiCrossing(from_clock, from_reset, from_source, scope.clock, scope.reset, depth, sync) } diff --git a/src/main/scala/uncore/devices/Debug.scala b/src/main/scala/uncore/devices/Debug.scala index 310bdc68..6714a741 100644 --- a/src/main/scala/uncore/devices/Debug.scala +++ b/src/main/scala/uncore/devices/Debug.scala @@ -994,7 +994,7 @@ object AsyncDebugBusCrossing { object AsyncDebugBusFrom { // OutsideClockDomain // takes from_source from the 'from' clock domain and puts it into your clock domain - def apply(from_clock: Clock, from_reset: Bool, from_source: DebugBusIO, depth: Int = 0, sync: Int = 2): DebugBusIO = { + def apply(from_clock: Clock, from_reset: Bool, from_source: DebugBusIO, depth: Int = 1, sync: Int = 3): DebugBusIO = { val scope = AsyncScope() AsyncDebugBusCrossing(from_clock, from_reset, from_source, scope.clock, scope.reset, depth, sync) } @@ -1002,7 +1002,7 @@ object AsyncDebugBusFrom { // OutsideClockDomain object AsyncDebugBusTo { // OutsideClockDomain // takes source from your clock domain and puts it into the 'to' clock domain - def apply(to_clock: Clock, to_reset: Bool, source: DebugBusIO, depth: Int = 0, sync: Int = 2): DebugBusIO = { + def apply(to_clock: Clock, to_reset: Bool, source: DebugBusIO, depth: Int = 1, sync: Int = 3): DebugBusIO = { val scope = AsyncScope() AsyncDebugBusCrossing(scope.clock, scope.reset, source, to_clock, to_reset, depth, sync) } From c8e6d478840909915ac69aa38820b132d9dd50c7 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 16:04:46 -0700 Subject: [PATCH 07/10] tilelink2: add a clock crossing adapter --- src/main/scala/uncore/tilelink2/Buffer.scala | 2 +- .../scala/uncore/tilelink2/Crossing.scala | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 src/main/scala/uncore/tilelink2/Crossing.scala diff --git a/src/main/scala/uncore/tilelink2/Buffer.scala b/src/main/scala/uncore/tilelink2/Buffer.scala index adfe4c4b..558a524a 100644 --- a/src/main/scala/uncore/tilelink2/Buffer.scala +++ b/src/main/scala/uncore/tilelink2/Buffer.scala @@ -22,7 +22,7 @@ class TLBuffer(entries: Int = 2, pipe: Boolean = false) extends LazyModule if (edgeOut.manager.anySupportAcquire && edgeOut.client.anySupportProbe) { in .b <> Queue(out.b, entries, pipe) out.c <> Queue(in .c, entries, pipe) - out.e <> Queue(out.e, entries, pipe) + out.e <> Queue(in .e, entries, pipe) } else { in.b.valid := Bool(false) in.c.ready := Bool(true) diff --git a/src/main/scala/uncore/tilelink2/Crossing.scala b/src/main/scala/uncore/tilelink2/Crossing.scala new file mode 100644 index 00000000..1f903c9a --- /dev/null +++ b/src/main/scala/uncore/tilelink2/Crossing.scala @@ -0,0 +1,42 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import junctions._ + +class TLAsyncCrossing(depth: Int = 8, sync: Int = 3) extends LazyModule +{ + val node = TLIdentityNode() + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val in_clock = Clock(INPUT) + val in_reset = Bool(INPUT) + val out = node.bundleOut + val out_clock = Clock(INPUT) + val out_reset = Bool(INPUT) + } + + // Transfer all TL2 bundles from/to the same domains + ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => + out.a <> AsyncDecoupledCrossing(io.in_clock, io.in_reset, in.a, io.out_clock, io.out_reset, depth, sync) + in.d <> AsyncDecoupledCrossing(io.out_clock, io.out_reset, out.d, io.in_clock, io.in_reset, depth, sync) + + if (edgeOut.manager.anySupportAcquire && edgeOut.client.anySupportProbe) { + in.b <> AsyncDecoupledCrossing(io.out_clock, io.out_reset, out.b, io.in_clock, io.in_reset, depth, sync) + out.c <> AsyncDecoupledCrossing(io.in_clock, io.in_reset, in.c, io.out_clock, io.out_reset, depth, sync) + out.e <> AsyncDecoupledCrossing(io.in_clock, io.in_reset, in.e, io.out_clock, io.out_reset, depth, sync) + } else { + in.b.valid := Bool(false) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + out.b.ready := Bool(true) + out.c.valid := Bool(false) + out.e.valid := Bool(false) + } + } + } +} From acedd3688a298b6022c597d83773fbe3813a8a52 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 16:35:06 -0700 Subject: [PATCH 08/10] tilelink2: unit test for the clock crossing --- src/main/scala/uncore/tilelink2/Fuzzer.scala | 26 +++++++++++++++++++- vsrc/ClockDivider.v | 19 ++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 vsrc/ClockDivider.v diff --git a/src/main/scala/uncore/tilelink2/Fuzzer.scala b/src/main/scala/uncore/tilelink2/Fuzzer.scala index b2a734f8..b5cdd263 100644 --- a/src/main/scala/uncore/tilelink2/Fuzzer.scala +++ b/src/main/scala/uncore/tilelink2/Fuzzer.scala @@ -206,6 +206,15 @@ class TLFuzzer( } } +class ClockDivider extends BlackBox { + val io = new Bundle { + val clock_in = Clock(INPUT) + val reset_in = Bool(INPUT) + val clock_out = Clock(OUTPUT) + val reset_out = Bool(OUTPUT) + } +} + class TLFuzzRAM extends LazyModule { val model = LazyModule(new TLRAMModel) @@ -213,14 +222,29 @@ class TLFuzzRAM extends LazyModule val gpio = LazyModule(new RRTest1(0x400)) val xbar = LazyModule(new TLXbar) val fuzz = LazyModule(new TLFuzzer(5000)) + val cross = LazyModule(new TLAsyncCrossing) model.node := fuzz.node xbar.node := TLWidthWidget(TLHintHandler(model.node), 16) - ram.node := TLFragmenter(TLBuffer(xbar.node), 4, 256) + cross.node := TLFragmenter(TLBuffer(xbar.node), 4, 256) + ram.node := cross.node gpio.node := TLFragmenter(TLBuffer(xbar.node), 4, 32) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { io.finished := fuzz.module.io.finished + + // Shove the RAM into another clock domain + val clocks = Module(new ClockDivider) + ram.module.clock := clocks.io.clock_out + ram.module.reset := clocks.io.reset_out + clocks.io.clock_in := clock + clocks.io.reset_in := reset + + // ... and safely cross TL2 into it + cross.module.io.in_clock := clock + cross.module.io.in_reset := reset + cross.module.io.out_clock := clocks.io.clock_out + cross.module.io.out_reset := clocks.io.reset_out } } diff --git a/vsrc/ClockDivider.v b/vsrc/ClockDivider.v new file mode 100644 index 00000000..83bb561b --- /dev/null +++ b/vsrc/ClockDivider.v @@ -0,0 +1,19 @@ +// You can't divide clocks in Chisel +module ClockDivider( + input clock_in, + input reset_in, + output clock_out, + output reset_out +); + + reg [2:0] shift = 3'b001; + + always @(posedge clock_in) + begin + shift <= {shift[0], shift[2:1]}; + end + + assign reset_out = reset_in; + assign clock_out = shift[0]; + +endmodule From cc88bf1b089156697b924757e8c4f4e8ab713e46 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 17:41:35 -0700 Subject: [PATCH 09/10] junctions: give unit tests more time --- src/main/scala/junctions/unittests/UnitTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/junctions/unittests/UnitTest.scala b/src/main/scala/junctions/unittests/UnitTest.scala index 77dd146b..b64e0f5f 100644 --- a/src/main/scala/junctions/unittests/UnitTest.scala +++ b/src/main/scala/junctions/unittests/UnitTest.scala @@ -39,7 +39,7 @@ class UnitTestSuite(implicit p: Parameters) extends Module { state := Mux(test_idx === UInt(tests.size - 1), s_done, s_start) } - val timer = Module(new Timer(100000, tests.size)) + val timer = Module(new Timer(500000, tests.size)) timer.io.start.valid := Bool(false) timer.io.stop.valid := Bool(false) From d23ab7370dd172db9925b466561f14a4d82eae5f Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 18:33:29 -0700 Subject: [PATCH 10/10] tilelink2: Unit Test for the RegisterCrossing --- .../uncore/tilelink2/RegisterRouterTest.scala | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala b/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala index 65dde1d3..be81b4c8 100644 --- a/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala +++ b/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala @@ -212,9 +212,37 @@ trait RRTest1Bundle { } -trait RRTest1Module extends HasRegMap +trait RRTest1Module extends Module with HasRegMap { - regmap(RRTest1Map.map:_*) + val clocks = Module(new ClockDivider) + clocks.io.clock_in := clock + clocks.io.reset_in := reset + + def x(bits: Int) = { + val field = UInt(width = bits) + + val readCross = Module(new RegisterReadCrossing(field)) + readCross.io.master_clock := clock + readCross.io.master_reset := reset + readCross.io.master_allow := Bool(true) + readCross.io.slave_clock := clocks.io.clock_out + readCross.io.slave_reset := clocks.io.reset_out + readCross.io.slave_allow := Bool(true) + + val writeCross = Module(new RegisterWriteCrossing(field)) + writeCross.io.master_clock := clock + writeCross.io.master_reset := reset + writeCross.io.master_allow := Bool(true) + writeCross.io.slave_clock := clocks.io.clock_out + writeCross.io.slave_reset := clocks.io.reset_out + writeCross.io.slave_allow := Bool(true) + + readCross.io.slave_register := writeCross.io.slave_register + RegField(bits, readCross.io.master_port, writeCross.io.master_port) + } + + val map = RRTest1Map.map.drop(1) ++ Seq(0 -> Seq(x(8), x(8), x(8), x(8))) + regmap(map:_*) } class RRTest1(address: BigInt) extends TLRegisterRouter(address, 0, 32, Some(6), 4)(