Provide Spanning Forest JSON debug info by default (#4136)

fixes #4125
com-lihaoyi · Dec 15, 2024 · 56befd8 · 56befd8
1 parent a117b62
commit 56befd8
Show file tree

Hide file tree

Showing 54 changed files with 298 additions and 141 deletions.
diff --git a/build.mill b/build.mill
@@ -256,7 +256,7 @@ object Deps {
   }
 }
 
-def millVersion: T[String] = Task {
+def millVersion: T[String] = Task.Input {
   if (Task.env.contains("MILL_STABLE_VERSION")) VcsVersion.calcVcsState(Task.log).format()
   else "SNAPSHOT"
 }

diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
@@ -95,8 +95,11 @@
 // These are things that most Mill developers would not encounter day to day,
 // but people developing Mill plugins or working on particularly large or
 // sophisticated Mill builds will need to understand.
+* xref:large/large.adoc[]
+** xref:large/selective-execution.adoc[]
+** xref:large/multi-file-builds.adoc[]
+
 * Mill In Depth
-** xref:depth/large-builds.adoc[]
 ** xref:depth/sandboxing.adoc[]
 ** xref:depth/evaluation-model.adoc[]
 ** xref:depth/design-principles.adoc[]

diff --git a/docs/modules/ROOT/pages/comparisons/unique.adoc b/docs/modules/ROOT/pages/comparisons/unique.adoc
@@ -85,8 +85,8 @@ xref:android/java.adoc[Android], and has demonstrated the ability to branch out
 more distant toolchains like xref:extending/example-typescript-support.adoc[Typescript]
 and xref:extending/example-python-support.adoc[Python].
 
-Mill also works well with xref:depth/large-builds.adoc[large builds]: its build logic can be
-split into multiple folders, is incrementally compiled,
+Mill also works well with xref:large/large.adoc[large builds]: its build logic can be
+xref:large/multi-file-builds.adoc[split into multiple folders], is incrementally compiled,
 lazily initialized, and automatically cached and parallelized. That means that even large
 codebases can remain fast and responsive: Mill's own build easily manages over 400 modules,
 and the tool can likely handle thousands of modules without issue.

diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc
@@ -13,7 +13,7 @@ or xref:comparisons/gradle.adoc[2-4x faster than Gradle]
 helps keep builds clean and understandable
 
 * Mill is an easier alternative to https://bazel.build/[Bazel]
-for xref:depth/large-builds.adoc[large multi-language monorepos] with hundreds of modules
+for xref:large/large.adoc[large multi-language monorepos] with hundreds of modules
 
 To get started using Mill, see the language-specific introductory documentation linked below:
 
@@ -37,7 +37,7 @@ Java platform's performance and usability:
 xref:depth/evaluation-model.adoc#_caching_at_each_layer_of_the_evaluation_model[caches]
 and xref:cli/flags.adoc#_jobs_j[parallelizes] build tasks to keep local development fast,
 and avoids the long configuration times seen in other tools like Gradle or SBT.
-xref:depth/large-builds.adoc#_selective_execution[Selective execution] keeps
+xref:large/selective-execution.adoc[Selective execution] keeps
 CI validation times short by only running the tests necessary to validate a code change.
 
 * *Maintainability*: Mill's config and xref:javalib/intro.adoc#_custom_build_logic[custom logic]

diff --git a/...odules/ROOT/pages/depth/large-builds.adoc → docs/modules/ROOT/pages/large/large.adoc b/...odules/ROOT/pages/depth/large-builds.adoc → docs/modules/ROOT/pages/large/large.adoc
@@ -1,7 +1,4 @@
 = Large Builds and Monorepos
-:page-aliases: Structuring_Large_Builds.adoc
-
-include::partial$gtag-config.adoc[]
 
 This section walks through Mill features and techniques used for managing large builds.
 While Mill works great for small single-module projects, it is also able to work
@@ -10,25 +7,13 @@ https://github.com/com-lihaoyi/mill[com-lihaoyi/mill] project has ~400 modules,
 other proprietary projects may have many more.
 
 Mill modules are cheap. Having more modules does not significantly impact performance
-or resource usage, build files are incrementally re-compiled when modified, and modules are 
-lazily loaded and initialized only when needed. So you are encouraged to break up your project 
+or resource usage, build files are incrementally re-compiled when modified, and modules are
+lazily loaded and initialized only when needed. So you are encouraged to break up your project
 into modules to manage the layering of your codebase or benefit from parallelism.
 
-== Selective Execution
-
-
-include::partial$example/depth/large/9-selective-execution.adoc[]
-
-== Multi-file Builds
-
-include::partial$example/depth/large/10-multi-file-builds.adoc[]
-
-== Helper Files
-
-include::partial$example/depth/large/11-helper-files.adoc[]
-
-== Legacy `.sc` extension
-
-include::partial$example/depth/large/12-helper-files-sc.adoc[]
+Apart from Mill's basic scalability and performance, Mill also comes with many features
+that can be utilized to help you manage the build system of a large project or codebase:
 
+* xref:large/selective-execution.adoc[]
 
+* xref:large/multi-file-builds.adoc[]
diff --git a/docs/modules/ROOT/pages/large/multi-file-builds.adoc b/docs/modules/ROOT/pages/large/multi-file-builds.adoc
@@ -0,0 +1,16 @@
+= Multi-File Builds
+:page-aliases: Structuring_Large_Builds.adoc
+
+include::partial$gtag-config.adoc[]
+
+include::partial$example/large/multi/10-multi-file-builds.adoc[]
+
+== Helper Files
+
+include::partial$example/large/multi/11-helper-files.adoc[]
+
+== Legacy `.sc` extension
+
+include::partial$example/large/multi/12-helper-files-sc.adoc[]
+
+
diff --git a/docs/modules/ROOT/pages/large/selective-execution.adoc b/docs/modules/ROOT/pages/large/selective-execution.adoc
@@ -0,0 +1,44 @@
+= Selective Execution
+
+include::partial$gtag-config.adoc[]
+
+
+include::partial$example/large/selective/9-selective-execution.adoc[]
+
+
+== Reproducibility and Determinism
+
+Selective execution relies on the inputs to your project being deterministic
+and reproducible, except for the code changes between the two versions, so that
+Mill can compare the state of the build inputs before and after and only run
+tasks downstream of those that changed. This is usually the case, but there are
+some subtleties to be aware of:
+
+- *Dynamic `Task.Input` to capture Git metadata must be disabled*, e.g. using
+  https://github.com/lefou/mill-vcs-version[mill-vcs-version]. The easiest way to do
+  this is to guard such dynamic inputs on an environment variable, such that
+  in most scenarios it returns a constant `"SNAPSHOT"` string, and only when
+  necessary do you pass in the environment variable to compute a real version (e.g.
+  during publishing)
+
+```scala
+def myProjectVersion: T[String] = Task.Input {
+  if (Task.env.contains("MY_PROJECT_STABLE_VERSION")) VcsVersion.calcVcsState(Task.log).format()
+  else "SNAPSHOT"
+}
+```
+
+- *The filesystem layout and position of the before/after codebases must be exactly
+  the same*. This is not an issue when running `selective.prepare`/`selective.run` on
+  the same folder on one machine, but if the two calls are run on separate machines
+  you need to make sure the directory path is the same.
+
+- *You must use the same Operating System amd Filesystem*, as differences there will
+  cause the filesystem signatures to change and thus spuriously trigger downstream tasks.
+  e.g. you cannot run `selective.prepare` on a Windows machine and `selective.run` on Linux
+
+- *Filesystem permissions must be preserved before/after*. e.g. running `selective,run}`
+  on different Github Actions machines sharing artifacts can cause issues as
+  `upload-artifact`/`download-artifact` https://github.com/actions/download-artifact#permission-loss[does not preserve filesystem permissions].
+  If this is an issue, you can run `chmod -R . 777` before each of `selective.{prepare,run}`
+  to ensure they have the exact same filesystem permissions.
diff --git a/docs/modules/ROOT/pages/migrating/migrating.adoc b/docs/modules/ROOT/pages/migrating/migrating.adoc
@@ -283,7 +283,7 @@ to see which ones may help:
 
 * xref:fundamentals/modules.adoc#_trait_modules[Trait Modules] to centralize common config
 
-* xref:depth/large-builds.adoc#_multi_file_builds[Multi-File Builds] to let you co-locate
+* xref:large/multi-file-builds.adoc[Multi-File Builds] to let you co-locate
   build logic and the code being built
 
 * xref:extending/writing-plugins.adoc[Writing and Publishing your own Mill Plugins]

diff --git a/...rge/10-multi-file-builds/bar/package.mill → ...lti/10-multi-file-builds/bar/package.mill b/...rge/10-multi-file-builds/bar/package.mill → ...lti/10-multi-file-builds/bar/package.mill
diff --git a/...-builds/bar/qux/mymodule/src/BarQux.scala → ...-builds/bar/qux/mymodule/src/BarQux.scala b/...-builds/bar/qux/mymodule/src/BarQux.scala → ...-builds/bar/qux/mymodule/src/BarQux.scala
diff --git a/...10-multi-file-builds/bar/qux/package.mill → ...10-multi-file-builds/bar/qux/package.mill b/...10-multi-file-builds/bar/qux/package.mill → ...10-multi-file-builds/bar/qux/package.mill
diff --git a/...pth/large/10-multi-file-builds/build.mill → ...rge/multi/10-multi-file-builds/build.mill b/...pth/large/10-multi-file-builds/build.mill → ...rge/multi/10-multi-file-builds/build.mill
diff --git a/...rge/10-multi-file-builds/foo/package.mill → ...lti/10-multi-file-builds/foo/package.mill b/...rge/10-multi-file-builds/foo/package.mill → ...lti/10-multi-file-builds/foo/package.mill
diff --git a/...ge/10-multi-file-builds/foo/src/Foo.scala → ...ti/10-multi-file-builds/foo/src/Foo.scala b/...ge/10-multi-file-builds/foo/src/Foo.scala → ...ti/10-multi-file-builds/foo/src/Foo.scala
diff --git a/...le/depth/large/11-helper-files/build.mill → ...le/large/multi/11-helper-files/build.mill b/...le/depth/large/11-helper-files/build.mill → ...le/large/multi/11-helper-files/build.mill
diff --git a/...th/large/11-helper-files/foo/package.mill → ...ge/multi/11-helper-files/foo/package.mill b/...th/large/11-helper-files/foo/package.mill → ...ge/multi/11-helper-files/foo/package.mill
diff --git a/...h/large/11-helper-files/foo/src/Foo.scala → ...e/multi/11-helper-files/foo/src/Foo.scala b/...h/large/11-helper-files/foo/src/Foo.scala → ...e/multi/11-helper-files/foo/src/Foo.scala
diff --git a/...h/large/11-helper-files/foo/versions.mill → ...e/multi/11-helper-files/foo/versions.mill b/...h/large/11-helper-files/foo/versions.mill → ...e/multi/11-helper-files/foo/versions.mill
diff --git a/...epth/large/11-helper-files/src/Main.scala → ...arge/multi/11-helper-files/src/Main.scala b/...epth/large/11-helper-files/src/Main.scala → ...arge/multi/11-helper-files/src/Main.scala
diff --git a/...ple/depth/large/11-helper-files/util.mill → ...ple/large/multi/11-helper-files/util.mill b/...ple/depth/large/11-helper-files/util.mill → ...ple/large/multi/11-helper-files/util.mill
diff --git a/...e/depth/large/12-helper-files-sc/build.sc → ...e/large/multi/12-helper-files-sc/build.sc b/...e/depth/large/12-helper-files-sc/build.sc → ...e/large/multi/12-helper-files-sc/build.sc
diff --git a/...h/large/12-helper-files-sc/foo/package.sc → ...e/multi/12-helper-files-sc/foo/package.sc b/...h/large/12-helper-files-sc/foo/package.sc → ...e/multi/12-helper-files-sc/foo/package.sc
diff --git a/...arge/12-helper-files-sc/foo/src/Foo.scala → ...ulti/12-helper-files-sc/foo/src/Foo.scala b/...arge/12-helper-files-sc/foo/src/Foo.scala → ...ulti/12-helper-files-sc/foo/src/Foo.scala
diff --git a/.../large/12-helper-files-sc/foo/versions.sc → .../multi/12-helper-files-sc/foo/versions.sc b/.../large/12-helper-files-sc/foo/versions.sc → .../multi/12-helper-files-sc/foo/versions.sc
diff --git a/...h/large/12-helper-files-sc/src/Main.scala → ...e/multi/12-helper-files-sc/src/Main.scala b/...h/large/12-helper-files-sc/src/Main.scala → ...e/multi/12-helper-files-sc/src/Main.scala
diff --git a/...le/depth/large/12-helper-files-sc/util.sc → ...le/large/multi/12-helper-files-sc/util.sc b/...le/depth/large/12-helper-files-sc/util.sc → ...le/large/multi/12-helper-files-sc/util.sc
diff --git a/...-helper-files-mill-scala/build.mill.scala → ...-helper-files-mill-scala/build.mill.scala b/...-helper-files-mill-scala/build.mill.scala → ...-helper-files-mill-scala/build.mill.scala
diff --git a/...r-files-mill-scala/foo/package.mill.scala → ...r-files-mill-scala/foo/package.mill.scala b/...r-files-mill-scala/foo/package.mill.scala → ...r-files-mill-scala/foo/package.mill.scala
diff --git a/...helper-files-mill-scala/foo/src/Foo.scala → ...helper-files-mill-scala/foo/src/Foo.scala b/...helper-files-mill-scala/foo/src/Foo.scala → ...helper-files-mill-scala/foo/src/Foo.scala
diff --git a/...-files-mill-scala/foo/versions.mill.scala → ...-files-mill-scala/foo/versions.mill.scala b/...-files-mill-scala/foo/versions.mill.scala → ...-files-mill-scala/foo/versions.mill.scala
diff --git a/...13-helper-files-mill-scala/src/Main.scala → ...13-helper-files-mill-scala/src/Main.scala b/...13-helper-files-mill-scala/src/Main.scala → ...13-helper-files-mill-scala/src/Main.scala
diff --git a/...3-helper-files-mill-scala/util.mill.scala → ...3-helper-files-mill-scala/util.mill.scala b/...3-helper-files-mill-scala/util.mill.scala → ...3-helper-files-mill-scala/util.mill.scala
diff --git a/...-selective-execution/bar/src/bar/Bar.java → ...-selective-execution/bar/src/bar/Bar.java b/...-selective-execution/bar/src/bar/Bar.java → ...-selective-execution/bar/src/bar/Bar.java
diff --git a/...-execution/bar/test/src/bar/BarTests.java → ...-execution/bar/test/src/bar/BarTests.java b/...-execution/bar/test/src/bar/BarTests.java → ...-execution/bar/test/src/bar/BarTests.java
diff --git a/...th/large/9-selective-execution/build.mill → ...elective/9-selective-execution/build.mill b/...th/large/9-selective-execution/build.mill → ...elective/9-selective-execution/build.mill
@@ -22,7 +22,7 @@
 // ```bash
 // > git checkout main # start from the target branch of the PR
 //
-// > ./mill selective.prepare __.test
+// > ./mill selective.prepare
 //
 // > git checkout pull-request-branch # go to the pull request branch
 //
@@ -105,3 +105,4 @@ Test run bar.BarTests finished: 0 failed, 0 ignored, 1 total, ...
 // tasks non-selectively, which is convenient if you want to conditionally disable selective
 // execution (e.g. perhaps you want to perform selective execution on pre-merge on pull
 // requests but not post-merge on the main branch)
+//
diff --git a/...-selective-execution/foo/src/foo/Foo.java → ...-selective-execution/foo/src/foo/Foo.java b/...-selective-execution/foo/src/foo/Foo.java → ...-selective-execution/foo/src/foo/Foo.java
diff --git a/...-execution/foo/test/src/bar/FooTests.java → ...-execution/foo/test/src/bar/FooTests.java b/...-execution/foo/test/src/bar/FooTests.java → ...-execution/foo/test/src/bar/FooTests.java
diff --git a/...-selective-execution/qux/src/qux/Qux.java → ...-selective-execution/qux/src/qux/Qux.java b/...-selective-execution/qux/src/qux/Qux.java → ...-selective-execution/qux/src/qux/Qux.java
diff --git a/...-execution/qux/test/src/qux/QuxTests.java → ...-execution/qux/test/src/qux/QuxTests.java b/...-execution/qux/test/src/qux/QuxTests.java → ...-execution/qux/test/src/qux/QuxTests.java
diff --git a/example/package.mill b/example/package.mill
@@ -89,11 +89,14 @@ object `package` extends RootModule with Module {
 
   object depth extends Module {
 
-    object large extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "large"))
-
     object sandbox extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "sandbox"))
     object javahome extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "javahome"))
   }
+  object large extends Module {
+
+    object selective extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "selective"))
+    object multi extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "multi"))
+  }
 
   object extending extends Module {
     object imports extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "imports"))

diff --git a/kotlinlib/package.mill b/kotlinlib/package.mill
@@ -16,6 +16,9 @@ object `package` extends RootModule with build.MillPublishScalaModule with Build
   def buildInfoObjectName = "Versions"
   def buildInfoMembers = Seq(
     BuildInfo.Value("kotlinVersion", build.Deps.kotlinVersion, "Version of Kotlin"),
+    BuildInfo.Value("kotlinVersion2", build.Deps.kotlinVersion, "Version of Kotlin"),
+    BuildInfo.Value("kotlinVersion3", build.Deps.kotlinVersion, "Version of Kotlin"),
+    BuildInfo.Value("kotlinVersion4", build.Deps.kotlinVersion, "Version of Kotlin"),
     BuildInfo.Value("koverVersion", build.Deps.RuntimeDeps.koverVersion, "Version of Kover."),
     BuildInfo.Value("ktfmtVersion", build.Deps.RuntimeDeps.ktfmt.version, "Version of Ktfmt."),
     BuildInfo.Value("ktlintVersion", build.Deps.RuntimeDeps.ktlint.version, "Version of ktlint."),

diff --git a/kotlinlib/worker/src/mill/kotlinlib/worker/api/KotlinWorker.scala b/kotlinlib/worker/src/mill/kotlinlib/worker/api/KotlinWorker.scala
@@ -10,7 +10,7 @@ import mill.api.{Ctx, Result}
 trait KotlinWorker {
 
   def compile(target: KotlinWorkerTarget, args: Seq[String])(implicit ctx: Ctx): Result[Unit]
-
+  val x = 1
 }
 
 sealed class KotlinWorkerTarget

diff --git a/main/client/src/mill/main/client/OutFiles.java b/main/client/src/mill/main/client/OutFiles.java
@@ -73,4 +73,7 @@ public class OutFiles {
    * root tasks changed so Mill can decide which tasks to execute.
    */
   public static final String millSelectiveExecution = "mill-selective-execution.json";
+
+  public static final String millDependencyForest = "mill-dependency-forest.json";
+  public static final String millInvalidationForest = "mill-invalidation-forest.json";
 }
diff --git a/main/codesig/src/Logger.scala b/main/codesig/src/Logger.scala
@@ -1,18 +1,29 @@
 package mill.codesig
 
-class Logger(logFolder: Option[os.Path]) {
+class Logger(mandatoryLogFolder: os.Path, logFolder: Option[os.Path]) {
   logFolder.foreach(os.remove.all(_))
+  os.remove.all(mandatoryLogFolder)
   private var count = 1
 
+  def log0[T: upickle.default.Writer](
+      p: os.Path,
+      res: sourcecode.Text[T],
+      prefix: String = ""
+  ): Unit = {
+    os.write(
+      p / s"$prefix${res.source}.json",
+      upickle.default.stream(res.value, indent = 4),
+      createFolders = true
+    )
+    count += 1
+  }
   def log[T: upickle.default.Writer](t: => sourcecode.Text[T], prefix: String = ""): Unit = {
-    lazy val res = t
-    logFolder.foreach { p =>
-      os.write(
-        p / s"$count-$prefix${res.source}.json",
-        upickle.default.stream(res.value, indent = 4),
-        createFolders = true
-      )
-      count += 1
-    }
+    logFolder.foreach(log0(_, t, s"$count-$prefix"))
+  }
+  def mandatoryLog[T: upickle.default.Writer](
+      t: => sourcecode.Text[T],
+      prefix: String = ""
+  ): Unit = {
+    log0(mandatoryLogFolder, t, prefix)
   }
 }
diff --git a/main/codesig/src/ReachabilityAnalysis.scala b/main/codesig/src/ReachabilityAnalysis.scala
@@ -1,7 +1,7 @@
 package mill.codesig
-import mill.util.Tarjans
+import mill.util.{SpanningForest, Tarjans}
 import upickle.default.{Writer, writer}
-import JvmModel._
+import JvmModel.*
 
 import scala.collection.immutable.SortedMap
 import ujson.Obj
@@ -77,6 +77,7 @@ class CallGraphAnalysis(
     .collect { case (CallGraphAnalysis.LocalDef(d), v) => (d.toString, v) }
     .to(SortedMap)
 
+  logger.mandatoryLog(transitiveCallGraphHashes0)
   logger.log(transitiveCallGraphHashes)
 
   lazy val spanningInvalidationForest: Obj = prevTransitiveCallGraphHashesOpt() match {
@@ -90,7 +91,7 @@ class CallGraphAnalysis(
     case None => ujson.Obj()
   }
 
-  logger.log(spanningInvalidationForest)
+  logger.mandatoryLog(spanningInvalidationForest)
 }
 
 object CallGraphAnalysis {
@@ -121,20 +122,22 @@ object CallGraphAnalysis {
       .filter { nodeIndex =>
         val currentValue = transitiveCallGraphHashes0Map(indexToNodes(nodeIndex))
         val prevValue = prevTransitiveCallGraphHashes.get(indexToNodes(nodeIndex).toString)
-
         !prevValue.contains(currentValue)
       }
       .toSet
 
-    def spanningTreeToJsonTree(node: SpanningForest.Node): ujson.Obj = {
-      ujson.Obj.from(
-        node.values.map { case (k, v) =>
-          indexToNodes(k).toString -> spanningTreeToJsonTree(v)
-        }
-      )
-    }
+    val reverseGraphMap = indexGraphEdges
+      .zipWithIndex
+      .flatMap { case (vs, k) => vs.map((_, k)) }
+      .groupMap(_._1)(_._2)
+
+    val reverseGraphEdges =
+      indexGraphEdges.indices.map(reverseGraphMap.getOrElse(_, Array())).toArray
 
-    spanningTreeToJsonTree(SpanningForest.apply(indexGraphEdges, nodesWithChangedHashes))
+    SpanningForest.spanningTreeToJsonTree(
+      SpanningForest.apply(reverseGraphEdges, nodesWithChangedHashes, false),
+      k => indexToNodes(k).toString
+    )
   }
 
   def indexGraphEdges(

diff --git a/main/codesig/src/ResolvedCalls.scala b/main/codesig/src/ResolvedCalls.scala
@@ -1,6 +1,7 @@
 package mill.codesig
 import JvmModel._
 import JType.{Cls => JCls}
+import mill.util.SpanningForest.breadthFirst
 import upickle.default.{ReadWriter, macroRW}
 
 case class ResolvedCalls(
@@ -188,20 +189,4 @@ object ResolvedCalls {
     )
   }
 
-  def breadthFirst[T](start: IterableOnce[T])(edges: T => IterableOnce[T]): Seq[T] = {
-    val seen = collection.mutable.Set.empty[T]
-    val seenList = collection.mutable.Buffer.empty[T]
-    val queued = collection.mutable.Queue.from(start)
-
-    while (queued.nonEmpty) {
-      val current = queued.dequeue()
-      seen.add(current)
-      seenList.append(current)
-
-      for (next <- edges(current).iterator) {
-        if (!seen.contains(next)) queued.enqueue(next)
-      }
-    }
-    seenList.toSeq
-  }
 }
diff --git a/main/codesig/test/src/Util.scala → main/codesig/test/src/TestUtil.scala b/main/codesig/test/src/Util.scala → main/codesig/test/src/TestUtil.scala
@@ -19,7 +19,7 @@ object TestUtil {
           .map(os.Path(_))
       ),
       (_, _) => false,
-      new Logger(Some(testLogFolder)),
+      new Logger(testLogFolder, Some(testLogFolder)),
       () => None
     )
   }