From e51609aec01133e74025324bb16743c32caed615 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 03:35:46 +0100 Subject: [PATCH 1/7] build: support waveform debug using opensource tools VCS is not free. Neither is the vcd format. Fortunately, verilator and gtkwave ARE free ... and faster too. This patch adds targets: run-regression-tests-fst run-asm-tests-fst ... which create opensource-compatible fst waveforms for gtkwave. --- emulator/Makefile | 5 +++++ src/main/scala/rocketchip/RocketTestSuite.scala | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/emulator/Makefile b/emulator/Makefile index 637a5c7a..1e362829 100644 --- a/emulator/Makefile +++ b/emulator/Makefile @@ -47,6 +47,11 @@ $(output_dir)/%.vpd: $(output_dir)/% $(emu_debug) vcd2vpd $@.vcd $@ > /dev/null & ./$(emu_debug) +max-cycles=$(timeout_cycles) +verbose -v$@.vcd $< $(disasm) $(patsubst %.vpd,%.out,$@) && [ $$PIPESTATUS -eq 0 ] +$(output_dir)/%.fst: $(output_dir)/% $(emu_debug) + rm -rf $@.vcd && mkfifo $@.vcd + vcd2fst -Z $@.vcd $@ & + ./$(emu_debug) +max-cycles=$(timeout_cycles) +verbose -v$@.vcd $< $(disasm) $(patsubst %.fst,%.out,$@) && [ $$PIPESTATUS -eq 0 ] + run: run-asm-tests run-bmark-tests run-debug: run-asm-tests-debug run-bmark-tests-debug run-fast: run-asm-tests-fast run-bmark-tests-fast diff --git a/src/main/scala/rocketchip/RocketTestSuite.scala b/src/main/scala/rocketchip/RocketTestSuite.scala index d8c25172..6b1c52fb 100644 --- a/src/main/scala/rocketchip/RocketTestSuite.scala +++ b/src/main/scala/rocketchip/RocketTestSuite.scala @@ -26,6 +26,9 @@ run-$makeTargetName: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $$($makeTa run-$makeTargetName-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $$($makeTargetName))) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' + +run-$makeTargetName-fst: $$(addprefix $$(output_dir)/, $$(addsuffix .fst, $$($makeTargetName))) +\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.fst,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' """ } @@ -78,6 +81,8 @@ run-$kind-$env-tests: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $suites)) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' run-$kind-$env-tests-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $suites)) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' +run-$kind-$env-tests-fst: $$(addprefix $$(output_dir)/, $$(addsuffix .fst, $suites)) +\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.fst,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' run-$kind-$env-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $suites)) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' """} } ).mkString("\n") + s""" @@ -85,6 +90,8 @@ run-$kind-tests: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $targets)) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' run-$kind-tests-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $targets)) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' +run-$kind-tests-fst: $$(addprefix $$(output_dir)/, $$(addsuffix .fst, $targets)) +\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.fst,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' run-$kind-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $targets)) \t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}' """ From bb334a2cf584b1a5a401fdeaa80307b65cdcadd1 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 03:55:46 +0100 Subject: [PATCH 2/7] util: add fast2slow direction option to rational crossings If you manually specify which side of the crossing is slow, you can move the registers fully to that clock domain. --- src/main/scala/util/RationalCrossing.scala | 70 ++++++++++++++++++---- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/src/main/scala/util/RationalCrossing.scala b/src/main/scala/util/RationalCrossing.scala index 8a0f5448..26a7673d 100644 --- a/src/main/scala/util/RationalCrossing.scala +++ b/src/main/scala/util/RationalCrossing.scala @@ -7,6 +7,32 @@ package util import Chisel._ +// A rational crossing must put registers on the slow side. +// This trait covers the options of how/where to put the registers. +// BEWARE: the source+sink must agree on the direction! +sealed trait RationalDirection { + def flip: RationalDirection +} + +// If it's unclear which side will be slow (or it is variable), +// place registers on both sides of the crossing, by splitting +// a Queue into flow and pipe parts on either side. This is safe +// for all possible clock ratios, but has the downside that the +// path from the slow domain must close timing in the fast domain. +case object Symmetric extends RationalDirection { + def flip = Symmetric +} + +// If the source is fast, place the registers at the sink. +case object FastToSlow extends RationalDirection { + def flip = SlowToFast +} + +// If the source is slow, place the registers at the source. +case object SlowToFast extends RationalDirection { + def flip = FastToSlow +} + final class RationalIO[T <: Data](gen: T) extends Bundle { val bits = gen.chiselCloneType @@ -23,15 +49,19 @@ object RationalIO def apply[T <: Data](gen: T) = new RationalIO(gen) } -class RationalCrossingSource[T <: Data](gen: T) extends Module +class RationalCrossingSource[T <: Data](gen: T, direction: RationalDirection = Symmetric) extends Module { val io = new Bundle { val enq = DecoupledIO(gen).flip val deq = RationalIO(gen) } - val enq = Queue(io.enq, 1, flow=true) val deq = io.deq + val enq = direction match { + case Symmetric => Queue(io.enq, 1, flow=true) + case FastToSlow => io.enq + case SlowToFast => Queue(io.enq, 2) + } val count = RegInit(UInt(0, width = 2)) val equal = count === deq.sink @@ -42,9 +72,16 @@ class RationalCrossingSource[T <: Data](gen: T) extends Module enq.ready := Mux(equal, deq.ready, count(1) =/= deq.sink(0)) when (enq.fire()) { count := Cat(count(0), !count(1)) } + + // Ensure the clocking is setup correctly + direction match { + case Symmetric => () // always safe + case FastToSlow => assert (equal || count(1) === deq.sink(0)) + case SlowToFast => assert (equal || count(1) =/= deq.sink(0)) + } } -class RationalCrossingSink[T <: Data](gen: T) extends Module +class RationalCrossingSink[T <: Data](gen: T, direction: RationalDirection = Symmetric) extends Module { val io = new Bundle { val enq = RationalIO(gen).flip @@ -53,7 +90,11 @@ class RationalCrossingSink[T <: Data](gen: T) extends Module val enq = io.enq val deq = Wire(io.deq) - io.deq <> Queue(deq, 1, pipe=true) + direction match { + case Symmetric => io.deq <> Queue(deq, 1, pipe=true) + case FastToSlow => io.deq <> Queue(deq, 2) + case SlowToFast => io.deq <> deq + } val count = RegInit(UInt(0, width = 2)) val equal = count === enq.source @@ -64,14 +105,21 @@ class RationalCrossingSink[T <: Data](gen: T) extends Module deq.valid := Mux(equal, enq.valid, count(1) =/= enq.source(0)) when (deq.fire()) { count := Cat(count(0), !count(1)) } + + // Ensure the clocking is setup correctly + direction match { + case Symmetric => () // always safe + case FastToSlow => assert (equal || count(1) =/= enq.source(0)) + case SlowToFast => assert (equal || count(1) === enq.source(0)) + } } -class RationalCrossing[T <: Data](gen: T) extends Module +class RationalCrossing[T <: Data](gen: T, direction: RationalDirection = Symmetric) extends Module { val io = new CrossingIO(gen) - val source = Module(new RationalCrossingSource(gen)) - val sink = Module(new RationalCrossingSink(gen)) + val source = Module(new RationalCrossingSource(gen, direction)) + val sink = Module(new RationalCrossingSink(gen, direction)) source.clock := io.enq_clock source.reset := io.enq_reset @@ -84,8 +132,8 @@ class RationalCrossing[T <: Data](gen: T) extends Module object ToRational { - def apply[T <: Data](x: DecoupledIO[T]): RationalIO[T] = { - val source = Module(new RationalCrossingSource(x.bits)) + def apply[T <: Data](x: DecoupledIO[T], direction: RationalDirection = Symmetric): RationalIO[T] = { + val source = Module(new RationalCrossingSource(x.bits, direction)) source.io.enq <> x source.io.deq } @@ -93,8 +141,8 @@ object ToRational object FromRational { - def apply[T <: Data](x: RationalIO[T]): DecoupledIO[T] = { - val sink = Module(new RationalCrossingSink(x.bits)) + def apply[T <: Data](x: RationalIO[T], direction: RationalDirection = Symmetric): DecoupledIO[T] = { + val sink = Module(new RationalCrossingSink(x.bits, direction)) sink.io.enq <> x sink.io.deq } From 924afebbd997e8351d5f33fb74dfcd82eb1c2084 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 04:19:00 +0100 Subject: [PATCH 3/7] tilelink2: make TLRational have configurable direction --- src/main/scala/uncore/tilelink2/Nodes.scala | 29 +++++++-------- .../scala/uncore/tilelink2/Parameters.scala | 9 +++++ .../uncore/tilelink2/RationalCrossing.scala | 36 ++++++++++--------- src/main/scala/uncore/tilelink2/package.scala | 2 +- 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/Nodes.scala b/src/main/scala/uncore/tilelink2/Nodes.scala index 82044873..22701f12 100644 --- a/src/main/scala/uncore/tilelink2/Nodes.scala +++ b/src/main/scala/uncore/tilelink2/Nodes.scala @@ -7,6 +7,7 @@ import chisel3.internal.sourceinfo.SourceInfo import config._ import diplomacy._ import scala.collection.mutable.ListBuffer +import util.RationalDirection case object TLMonitorBuilder extends Field[TLMonitorArgs => Option[TLMonitorBase]] case object TLFuzzReadyValid extends Field[Boolean] @@ -182,20 +183,20 @@ case class TLAsyncSinkNode(depth: Int, sync: Int) dFn = { p => p.base.copy(minLatency = sync+1) }, uFn = { p => TLAsyncManagerPortParameters(depth, p) }) -object TLRationalImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TLEdgeParameters, TLEdgeParameters, TLRationalBundle] +object TLRationalImp extends NodeImp[TLRationalClientPortParameters, TLRationalManagerPortParameters, TLRationalEdgeParameters, TLRationalEdgeParameters, TLRationalBundle] { - def edgeO(pd: TLClientPortParameters, pu: TLManagerPortParameters): TLEdgeParameters = TLEdgeParameters(pd, pu) - def edgeI(pd: TLClientPortParameters, pu: TLManagerPortParameters): TLEdgeParameters = TLEdgeParameters(pd, pu) + def edgeO(pd: TLRationalClientPortParameters, pu: TLRationalManagerPortParameters): TLRationalEdgeParameters = TLRationalEdgeParameters(pd, pu) + def edgeI(pd: TLRationalClientPortParameters, pu: TLRationalManagerPortParameters): TLRationalEdgeParameters = TLRationalEdgeParameters(pd, pu) - def bundleO(eo: Seq[TLEdgeParameters]): Vec[TLRationalBundle] = Vec(eo.size, new TLRationalBundle(TLBundleParameters.union(eo.map(_.bundle)))) - def bundleI(ei: Seq[TLEdgeParameters]): Vec[TLRationalBundle] = Vec(ei.size, new TLRationalBundle(TLBundleParameters.union(ei.map(_.bundle)))) + def bundleO(eo: Seq[TLRationalEdgeParameters]): Vec[TLRationalBundle] = Vec(eo.size, new TLRationalBundle(TLBundleParameters.union(eo.map(_.bundle)))) + def bundleI(ei: Seq[TLRationalEdgeParameters]): Vec[TLRationalBundle] = Vec(ei.size, new TLRationalBundle(TLBundleParameters.union(ei.map(_.bundle)))) def colour = "#00ff00" // green - override def mixO(pd: TLClientPortParameters, node: OutwardNode[TLClientPortParameters, TLManagerPortParameters, TLRationalBundle]): TLClientPortParameters = - pd.copy(clients = pd.clients.map { c => c.copy (nodePath = node +: c.nodePath) }) - override def mixI(pu: TLManagerPortParameters, node: InwardNode[TLClientPortParameters, TLManagerPortParameters, TLRationalBundle]): TLManagerPortParameters = - pu.copy(managers = pu.managers.map { m => m.copy (nodePath = node +: m.nodePath) }) + override def mixO(pd: TLRationalClientPortParameters, node: OutwardNode[TLRationalClientPortParameters, TLRationalManagerPortParameters, TLRationalBundle]): TLRationalClientPortParameters = + pd.copy(base = pd.base.copy(clients = pd.base.clients.map { c => c.copy (nodePath = node +: c.nodePath) })) + override def mixI(pu: TLRationalManagerPortParameters, node: InwardNode[TLRationalClientPortParameters, TLRationalManagerPortParameters, TLRationalBundle]): TLRationalManagerPortParameters = + pu.copy(base = pu.base.copy(managers = pu.base.managers.map { m => m.copy (nodePath = node +: m.nodePath) })) } case class TLRationalIdentityNode() extends IdentityNode(TLRationalImp) @@ -204,10 +205,10 @@ case class TLRationalInputNode() extends InputNode(TLRationalImp) case class TLRationalSourceNode() extends MixedAdapterNode(TLImp, TLRationalImp)( - dFn = { p => p }, - uFn = { p => p.copy(minLatency = 1) }) // discard cycles from other clock domain + dFn = { p => TLRationalClientPortParameters(p) }, + uFn = { p => p.base.copy(minLatency = 1) }) // discard cycles from other clock domain -case class TLRationalSinkNode() +case class TLRationalSinkNode(direction: RationalDirection) extends MixedAdapterNode(TLRationalImp, TLImp)( - dFn = { p => p.copy(minLatency = 1) }, - uFn = { p => p }) + dFn = { p => p.base.copy(minLatency = 1) }, + uFn = { p => TLRationalManagerPortParameters(direction, p) }) diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index 21b7a241..3462e559 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -5,6 +5,7 @@ package uncore.tilelink2 import Chisel._ import diplomacy._ import scala.math.max +import util.RationalDirection case class TLManagerParameters( address: Seq[AddressSet], @@ -326,6 +327,14 @@ case class TLAsyncEdgeParameters(client: TLAsyncClientPortParameters, manager: T val bundle = TLAsyncBundleParameters(manager.depth, TLBundleParameters(client.base, manager.base)) } +case class TLRationalManagerPortParameters(direction: RationalDirection, base: TLManagerPortParameters) +case class TLRationalClientPortParameters(base: TLClientPortParameters) + +case class TLRationalEdgeParameters(client: TLRationalClientPortParameters, manager: TLRationalManagerPortParameters) +{ + val bundle = TLBundleParameters(client.base, manager.base) +} + object ManagerUnification { def apply(managers: Seq[TLManagerParameters]) = { diff --git a/src/main/scala/uncore/tilelink2/RationalCrossing.scala b/src/main/scala/uncore/tilelink2/RationalCrossing.scala index 6d3b70fd..8a3b0b64 100644 --- a/src/main/scala/uncore/tilelink2/RationalCrossing.scala +++ b/src/main/scala/uncore/tilelink2/RationalCrossing.scala @@ -27,14 +27,15 @@ class TLRationalCrossingSource(implicit p: Parameters) extends LazyModule ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => val bce = edgeIn.manager.anySupportAcquireB && edgeIn.client.anySupportProbe + val direction = edgeOut.manager.direction - out.a <> ToRational(in.a) - in.d <> FromRational(out.d) + out.a <> ToRational(in.a, direction) + in.d <> FromRational(out.d, direction.flip) if (bce) { - in.b <> FromRational(out.b) - out.c <> ToRational(in.c) - out.e <> ToRational(in.e) + in.b <> FromRational(out.b, direction.flip) + out.c <> ToRational(in.c, direction) + out.e <> ToRational(in.e, direction) } else { in.b.valid := Bool(false) in.c.ready := Bool(true) @@ -50,9 +51,9 @@ class TLRationalCrossingSource(implicit p: Parameters) extends LazyModule } } -class TLRationalCrossingSink(implicit p: Parameters) extends LazyModule +class TLRationalCrossingSink(direction: RationalDirection = Symmetric)(implicit p: Parameters) extends LazyModule { - val node = TLRationalSinkNode() + val node = TLRationalSinkNode(direction) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -62,14 +63,15 @@ class TLRationalCrossingSink(implicit p: Parameters) extends LazyModule ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => val bce = edgeOut.manager.anySupportAcquireB && edgeOut.client.anySupportProbe + val direction = edgeIn.manager.direction - out.a <> FromRational(in.a) - in.d <> ToRational(out.d) + out.a <> FromRational(in.a, direction) + in.d <> ToRational(out.d, direction.flip) if (bce) { - in.b <> ToRational(out.b) - out.c <> FromRational(in.c) - out.e <> FromRational(in.e) + in.b <> ToRational(out.b, direction.flip) + out.c <> FromRational(in.c, direction) + out.e <> FromRational(in.e, direction) } else { out.b.ready := Bool(true) out.c.valid := Bool(false) @@ -98,21 +100,21 @@ object TLRationalCrossingSource object TLRationalCrossingSink { // applied to the TL source node; y.node := TLRationalCrossingSink()(x.node) - def apply()(x: TLRationalOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = { - val sink = LazyModule(new TLRationalCrossingSink) + def apply(direction: RationalDirection = Symmetric)(x: TLRationalOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = { + val sink = LazyModule(new TLRationalCrossingSink(direction)) sink.node := x sink.node } } -class TLRationalCrossing(implicit p: Parameters) extends LazyModule +class TLRationalCrossing(direction: RationalDirection = Symmetric)(implicit p: Parameters) extends LazyModule { val nodeIn = TLInputNode() val nodeOut = TLOutputNode() val node = NodeHandle(nodeIn, nodeOut) val source = LazyModule(new TLRationalCrossingSource) - val sink = LazyModule(new TLRationalCrossingSink) + val sink = LazyModule(new TLRationalCrossingSink(direction)) val _ = (sink.node := source.node) // no monitor val in = (source.node := nodeIn) @@ -150,7 +152,7 @@ import unittest._ class TLRAMRationalCrossing(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) val model = LazyModule(new TLRAMModel) - val cross = LazyModule(new TLRationalCrossing) + val cross = LazyModule(new TLRationalCrossing(FastToSlow)) val delay = LazyModule(new TLDelayer(0.25)) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) diff --git a/src/main/scala/uncore/tilelink2/package.scala b/src/main/scala/uncore/tilelink2/package.scala index f382ebdf..026b592d 100644 --- a/src/main/scala/uncore/tilelink2/package.scala +++ b/src/main/scala/uncore/tilelink2/package.scala @@ -10,7 +10,7 @@ package object tilelink2 type TLInwardNode = InwardNodeHandle[TLClientPortParameters, TLManagerPortParameters, TLBundle] type TLOutwardNode = OutwardNodeHandle[TLClientPortParameters, TLManagerPortParameters, TLBundle] type TLAsyncOutwardNode = OutwardNodeHandle[TLAsyncClientPortParameters, TLAsyncManagerPortParameters, TLAsyncBundle] - type TLRationalOutwardNode = OutwardNodeHandle[TLClientPortParameters, TLManagerPortParameters, TLRationalBundle] + type TLRationalOutwardNode = OutwardNodeHandle[TLRationalClientPortParameters, TLRationalManagerPortParameters, TLRationalBundle] type IntOutwardNode = OutwardNodeHandle[IntSourcePortParameters, IntSinkPortParameters, Vec[Bool]] def OH1ToOH(x: UInt) = (x << 1 | UInt(1)) & ~Cat(UInt(0, width=1), x) From 91d1880dbf63696baa29d80e1f1576d8ba19b98b Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 11:49:35 +0100 Subject: [PATCH 4/7] ClockDivider2: fix launch alignment of clocks (vcs) Doing this in Chisel leads to non-determinism due to shitty Verilog ordering semantis. Using an '=' ensures that all of the clock posedges fire before concurrent register updates. See "Gotcha 29: Sequential logic that requires blocking assignments" in "Verilog and SystemVerilog Gotchas" by Stuart Sutherland, Don Mills. --- src/main/scala/util/ClockDivider.scala | 15 ++++++--------- vsim/Makefrag | 1 + vsrc/ClockDivider2.v | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 vsrc/ClockDivider2.v diff --git a/src/main/scala/util/ClockDivider.scala b/src/main/scala/util/ClockDivider.scala index 500650a8..094f045f 100644 --- a/src/main/scala/util/ClockDivider.scala +++ b/src/main/scala/util/ClockDivider.scala @@ -5,15 +5,11 @@ package util import Chisel._ /** Divide the clock by 2 */ -class ClockDivider2 extends Module { +class ClockDivider2 extends BlackBox { val io = new Bundle { - val clock_out = Clock(OUTPUT) + val clk_out = Clock(OUTPUT) + val clk_in = Clock(INPUT) } - - val clock_reg = Reg(Bool()) - clock_reg := !clock_reg - - io.clock_out := clock_reg.asClock } /** Divide the clock by power of 2 times. @@ -30,9 +26,10 @@ class Pow2ClockDivider(pow2: Int) extends Module { val dividers = Seq.fill(pow2) { Module(new ClockDivider2) } dividers.init.zip(dividers.tail).map { case (last, next) => - next.clock := last.io.clock_out + next.io.clk_in := last.io.clk_out } - io.clock_out := dividers.last.io.clock_out + dividers.head.io.clk_in := clock + io.clock_out := dividers.last.io.clk_out } } diff --git a/vsim/Makefrag b/vsim/Makefrag index 66f681c8..93c38d8f 100644 --- a/vsim/Makefrag +++ b/vsim/Makefrag @@ -6,6 +6,7 @@ bb_vsrcs = $(base_dir)/vsrc/DebugTransportModuleJtag.v \ $(base_dir)/vsrc/jtag_vpi.v \ + $(base_dir)/vsrc/ClockDivider2.v \ $(base_dir)/vsrc/AsyncResetReg.v \ sim_vsrcs = \ diff --git a/vsrc/ClockDivider2.v b/vsrc/ClockDivider2.v new file mode 100644 index 00000000..9da5e93c --- /dev/null +++ b/vsrc/ClockDivider2.v @@ -0,0 +1,21 @@ +// See LICENSE.SiFive for license details. + +/** This black-boxes a Clock Divider. + * + * Because Chisel does not support + * blocking assignments, it is impossible + * to create a deterministic divided clock. + * + * @param clk_out Divided Clock + * @param clk_in Clock Input + * + */ + +module ClockDivider2 (output reg clk_out, input clk_in); + + initial clk_out = 1'b0; + always @(posedge clk_in) begin + clk_out = ~clk_out; // Must use =, NOT <= + end + +endmodule // ClockDivider2 From 5045696f92a74a76400a1cc321723ce1a7d5aef1 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 14:16:45 +0100 Subject: [PATCH 5/7] TLRational: test all corners In order to ensure that verilator is happy, we launch both clocks from a clock divider. Sadly, it does not follow the spec wrt. derived clocks. See the verilator manual section on "Generated Clocks". --- .../uncore/tilelink2/RationalCrossing.scala | 80 ++++++++++++++----- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/RationalCrossing.scala b/src/main/scala/uncore/tilelink2/RationalCrossing.scala index 8a3b0b64..a736269c 100644 --- a/src/main/scala/uncore/tilelink2/RationalCrossing.scala +++ b/src/main/scala/uncore/tilelink2/RationalCrossing.scala @@ -149,35 +149,73 @@ class TLRationalCrossing(direction: RationalDirection = Symmetric)(implicit p: P /** Synthesizeable unit tests */ import unittest._ -class TLRAMRationalCrossing(implicit p: Parameters) extends LazyModule { +class TLRAMRationalCrossingSource(implicit p: Parameters) extends LazyModule { + val node = TLRationalOutputNode() val fuzz = LazyModule(new TLFuzzer(5000)) val model = LazyModule(new TLRAMModel) - val cross = LazyModule(new TLRationalCrossing(FastToSlow)) - val delay = LazyModule(new TLDelayer(0.25)) - val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) model.node := fuzz.node - cross.node := TLDelayer(0.25)(TLFragmenter(4, 256)(model.node)) - val monitor1 = (delay.node := cross.node) - val monitor2 = (ram.node := delay.node) - val monitors = monitor1.toList ++ monitor2.toList + node := TLRationalCrossingSource()(TLDelayer(0.25)(model.node)) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val finished = Bool(OUTPUT) + val out = node.bundleOut + } + io.finished := fuzz.module.io.finished + } +} + +class TLRAMRationalCrossingSink(direction: RationalDirection)(implicit p: Parameters) extends LazyModule { + val node = TLRationalInputNode() + val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) + + ram.node := TLFragmenter(4, 256)(TLDelayer(0.25)(TLRationalCrossingSink(direction)(node))) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + } + } +} + +class TLRAMRationalCrossing(implicit p: Parameters) extends LazyModule { + val sym_fast_source = LazyModule(new TLRAMRationalCrossingSource) + val sym_slow_sink = LazyModule(new TLRAMRationalCrossingSink(Symmetric)) + sym_slow_sink.node := sym_fast_source.node + + val sym_slow_source = LazyModule(new TLRAMRationalCrossingSource) + val sym_fast_sink = LazyModule(new TLRAMRationalCrossingSink(Symmetric)) + sym_fast_sink.node := sym_slow_source.node + + val fix_fast_source = LazyModule(new TLRAMRationalCrossingSource) + val fix_slow_sink = LazyModule(new TLRAMRationalCrossingSink(FastToSlow)) + fix_slow_sink.node := fix_fast_source.node + + val fix_slow_source = LazyModule(new TLRAMRationalCrossingSource) + val fix_fast_sink = LazyModule(new TLRAMRationalCrossingSink(SlowToFast)) + fix_fast_sink.node := fix_slow_source.node lazy val module = new LazyModuleImp(this) with HasUnitTestIO { - io.finished := fuzz.module.io.finished + io.finished := + sym_fast_source.module.io.finished && + sym_slow_source.module.io.finished && + fix_fast_source.module.io.finished && + fix_slow_source.module.io.finished - // Shove the RAM into another clock domain - val clocks = Module(new util.Pow2ClockDivider(2)) - ram.module.clock := clocks.io.clock_out - delay.module.clock := clocks.io.clock_out + // Generate faster clock (still divided so verilator approves) + val fast = Module(new util.Pow2ClockDivider(1)) + sym_fast_source.module.clock := fast.io.clock_out + sym_fast_sink .module.clock := fast.io.clock_out + fix_fast_source.module.clock := fast.io.clock_out + fix_fast_sink .module.clock := fast.io.clock_out - // ... and safely cross TL2 into it - cross.module.io.in_clock := clock - cross.module.io.in_reset := reset - cross.module.io.out_clock := clocks.io.clock_out - cross.module.io.out_reset := reset - - // Push the Monitors into the right clock domain - monitors.foreach { m => m.module.clock := clocks.io.clock_out } + // Generate slower clock + val slow = Module(new util.Pow2ClockDivider(2)) + sym_slow_source.module.clock := slow.io.clock_out + sym_slow_sink .module.clock := slow.io.clock_out + fix_slow_source.module.clock := slow.io.clock_out + fix_slow_sink .module.clock := slow.io.clock_out } } From 3931b0faff5e77b02a1e27215519d789c2ca5209 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 15:15:41 +0100 Subject: [PATCH 6/7] coreplex: assume L1 runs no slower than L2 --- src/main/scala/coreplex/RocketTiles.scala | 2 +- src/main/scala/rocket/Tile.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/coreplex/RocketTiles.scala b/src/main/scala/coreplex/RocketTiles.scala index 60ceb4f3..31f39ba6 100644 --- a/src/main/scala/coreplex/RocketTiles.scala +++ b/src/main/scala/coreplex/RocketTiles.scala @@ -73,7 +73,7 @@ trait HasRocketTiles extends CoreplexRISCVPlatform { } case Rational => { val wrapper = LazyModule(new RationalRocketTile(c)(pWithExtra)) - val sink = LazyModule(new TLRationalCrossingSink) + val sink = LazyModule(new TLRationalCrossingSink(util.FastToSlow)) val source = LazyModule(new TLRationalCrossingSource) sink.node :=* wrapper.masterNode l1tol2.node :=* sink.node diff --git a/src/main/scala/rocket/Tile.scala b/src/main/scala/rocket/Tile.scala index 026564e1..23f726ff 100644 --- a/src/main/scala/rocket/Tile.scala +++ b/src/main/scala/rocket/Tile.scala @@ -103,7 +103,7 @@ class RationalRocketTile(rtp: RocketTileParams)(implicit p: Parameters) extends masterNode :=* source.node val slaveNode = TLRationalInputNode() - val sink = LazyModule(new TLRationalCrossingSink) + val sink = LazyModule(new TLRationalCrossingSink(util.SlowToFast)) rocket.slaveNode :*= sink.node sink.node :*= slaveNode From 9153a9a7335f319a8d45f912898c4d8695168e08 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Feb 2017 19:34:44 +0100 Subject: [PATCH 7/7] ClockDivider: add docs to appease the reviewer ... even though this means a delay of 1:30 hours :( --- src/main/scala/util/ClockDivider.scala | 10 +++++++++- vsrc/ClockDivider2.v | 5 ++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/main/scala/util/ClockDivider.scala b/src/main/scala/util/ClockDivider.scala index 094f045f..bbe07279 100644 --- a/src/main/scala/util/ClockDivider.scala +++ b/src/main/scala/util/ClockDivider.scala @@ -4,7 +4,15 @@ package util import Chisel._ -/** Divide the clock by 2 */ +/** This black-boxes a Clock Divider by 2. + * The output clock is phase-aligned to the input clock. + * If you use this in synthesis, make sure your sdc + * declares that you want it to do the same. + * + * Because Chisel does not support + * blocking assignments, it is impossible + * to create a deterministic divided clock. + */ class ClockDivider2 extends BlackBox { val io = new Bundle { val clk_out = Clock(OUTPUT) diff --git a/vsrc/ClockDivider2.v b/vsrc/ClockDivider2.v index 9da5e93c..b1190d33 100644 --- a/vsrc/ClockDivider2.v +++ b/vsrc/ClockDivider2.v @@ -1,6 +1,9 @@ // See LICENSE.SiFive for license details. -/** This black-boxes a Clock Divider. +/** This black-boxes a Clock Divider by 2. + * The output clock is phase-aligned to the input clock. + * If you use this in synthesis, make sure your sdc + * declares that you want it to do the same. * * Because Chisel does not support * blocking assignments, it is impossible