From ded5dd994d836aa8c89b5d5fede0e3e274922b86 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 18 Dec 2024 14:37:25 +0800 Subject: [PATCH 1/3] Update index.adoc --- blog/modules/ROOT/pages/index.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blog/modules/ROOT/pages/index.adoc b/blog/modules/ROOT/pages/index.adoc index a38979530ff..42571d4fd2f 100644 --- a/blog/modules/ROOT/pages/index.adoc +++ b/blog/modules/ROOT/pages/index.adoc @@ -1,4 +1,4 @@ -# Mill Build Tool Development Blog +# Mill Build Tool Engineering Blog include::mill:ROOT:partial$gtag-config.adoc[] From d3e5d50c29acf728026a1fe8b25d9e2697698e77 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 18 Dec 2024 15:45:24 +0800 Subject: [PATCH 2/3] Revert "Always check `available()` in `InputPumper` to avoid burning CPU (#4095)" (#4159) Fixes https://github.com/com-lihaoyi/mill/issues/4158 This hits some JVM limitations that significantly slow down the process exit, unnecessarily. Added a comment to the code so we don't forget again --- main/client/src/mill/main/client/InputPumper.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/main/client/src/mill/main/client/InputPumper.java b/main/client/src/mill/main/client/InputPumper.java index bae7005a5a8..cb77c67d5f0 100644 --- a/main/client/src/mill/main/client/InputPumper.java +++ b/main/client/src/mill/main/client/InputPumper.java @@ -36,7 +36,12 @@ public void run() { byte[] buffer = new byte[1024]; try { while (running) { - if (!runningCheck.getAsBoolean()) running = false; + if (!runningCheck.getAsBoolean()) { + running = false; + // We need to check `.available` and avoid calling `.read`, because if we call `.read` + // and there is nothing to read, it can unnecessarily delay the JVM exit by 350ms + // https://stackoverflow.com/questions/48951611/blocking-on-stdin-makes-java-process-take-350ms-more-to-exit + } else if (checkAvailable && src.available() == 0) Thread.sleep(1); else { int n; try { From 714cc0f90389bd0ba8fa43a8524a629066767ada Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Wed, 18 Dec 2024 16:11:21 +0800 Subject: [PATCH 3/3] Mill Process Architecture docs (#4160) --- docs/modules/ROOT/nav.adoc | 1 + .../pages/depth/process-architecture.adoc | 167 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 docs/modules/ROOT/pages/depth/process-architecture.adoc diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 5e7dd931592..16f0eac9fb8 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -101,6 +101,7 @@ * Mill In Depth ** xref:depth/sandboxing.adoc[] ** xref:depth/execution-model.adoc[] +** xref:depth/process-architecture.adoc[] ** xref:depth/design-principles.adoc[] ** xref:depth/why-scala.adoc[] // Reference pages that a typical user would not typically read top-to-bottom, diff --git a/docs/modules/ROOT/pages/depth/process-architecture.adoc b/docs/modules/ROOT/pages/depth/process-architecture.adoc new file mode 100644 index 00000000000..436be147912 --- /dev/null +++ b/docs/modules/ROOT/pages/depth/process-architecture.adoc @@ -0,0 +1,167 @@ += The Mill Process Architecture + +include::partial$gtag-config.adoc[] + +This page goes into detail of how the Mill process and application is structured. +At a high-level, a simplified version of the main components and data-flows within +a running Mill process is shown below: + +```graphviz +digraph G { + rankdir=LR + node [shape=box width=0 height=0 style=filled fillcolor=white] + bgcolor=transparent + + "client-stdin" [penwidth=0] + "client-stdout" [penwidth=0] + "client-stderr" [penwidth=0] + "client-exit" [penwidth=0] + "client-args" [penwidth=0] + subgraph cluster_client { + label = "mill client"; + "Socket" + "MillClientMain" + } + "client-stdin" -> "Socket" + "client-stderr" -> "Socket" [dir=back] + "client-stdout" -> "Socket" [dir=back] + "client-args" -> "MillClientMain" + "client-exit" -> "MillClientMain" [dir=back] + "MillClientMain" -> "runArgs" + subgraph cluster_out { + label = "out/"; + + + subgraph cluster_mill_server_folder { + label = "mill-server/"; + "socketPort" [penwidth=0] + "exitCode" [penwidth=0] + "runArgs" [penwidth=0] + } + subgraph cluster_out_foo_folder { + label = "foo/"; + "compile.json" [penwidth=0] + "compile.dest" [penwidth=0] + "assembly.json" [penwidth=0] + "assembly.dest" [penwidth=0] + + } + } + + + subgraph cluster_server { + label = "mill server"; + "PromptLogger" + "MillServerMain" + "Evaluator" + "ServerSocket" + + "server-stdout" [penwidth=0] + "server-stderr" [penwidth=0] + subgraph cluster_classloder { + label = "URLClassLoader"; + subgraph cluster_build { + style=dashed + label = "build"; + subgraph cluster_foo { + style=dashed + label = "foo"; + + "foo.sources" -> "foo.compile" -> "foo.classPath" -> "foo.assembly" + "foo.resources" -> "foo.assembly" + "foo.classPath" + } + } + + } + } + + + "runArgs" -> "MillServerMain" + "MillServerMain" -> "Evaluator" [dir=both] + "ServerSocket" -> "PromptLogger" [dir=back] + "exitCode" -> "MillServerMain" [dir=back] + "MillClientMain" -> "exitCode" [dir=back] + "Socket" -> "socketPort" [dir=both] + "socketPort" -> "ServerSocket" [dir=both] + + "PromptLogger" -> "server-stderr" [dir=back] + "PromptLogger" -> "server-stdout" [dir=back] + "compile.dest" -> "foo.compile" [dir=both] + "compile.json" -> "foo.compile" [dir=both] + + "assembly.dest" -> "foo.assembly" [dir=both] + "assembly.json" -> "foo.assembly" [dir=both] +} +``` + + +== The Mill Client + +The Mill client is a small Java application that is responsible for launching +and delegating work to the Mill server, a long-lived process. Each `./mill` +command spawns a new Mill client, but generally re-uses the same Mill server where +possible in order to reduce startup overhead and to allow the Mill server +process to warm up and provide good performance + +* The Mill client takes all the inputs of a typical command-line application - +stdin and command-line arguments - and proxies them to the long-lived Mill +server process. + +* It then takes the outputs from the Mill server - stdout, stderr, +and finally the exitcode - and proxies those back to the calling process or terminal. + +In this way, the Mill client acts and behaves for most all intents and purposes +as a normal CLI application, except it is really a thin wrapper around logic that +is actually running in the long-lived Mill server. + +The Mill server sometimes is shut down and needs to be restarted, e.g. if Mill +version changed, or the user used `Ctrl-C` to interrupt the ongoing computation. +In such a scenario, the Mill client will automatically restart the server the next +time it is run, so apart from a slight performance penalty from starting a "cold" +Mill server such shutdowns and restarts should be mostly invisibl to the user. + +== The Mill Server + +The Mill server is a long-lived process that the Mill client spawns. +Only one Mill server should be running in a codebase at a time, and each server +takes a filelock at startup time to enforce this mutual exclusion. + +The Mill server compiles your `build.mill` and `package.mill`, spawns a +`URLClassLoader` containing the compiled classfiles, and uses that to instantiate +the variousxref:fundamentals/modules.adoc[] and xref:fundamentals/tasks.adoc[] +dynamically in-memory. These are then used by the `Evaluator`, which resolves, +plans, and executes the tasks specified by the given `runArgs` + +During execution, both standard output +and standard error are captured during evaluation and forwarded to the `PromptLogger`. +`PromptLogger` annotates the output stream with the line-prefixes, prompt, and ANSI +terminal commands necessary to generate the dynamic prompt, and then forwards both +streams multi-plexed over a single socket stream back to the Mill client. The client +then de-multiplexes the combined stream to split it back into output and error, which +are then both forwarded to the process or terminal that invoked the Mill client. + +Lastly, when the Mill server completes its tasks, it writes the `exitCode` to a file +that is then propagated back to the Mill client. The Mill client terminates with this +exit code, but the Mill server remains alive and ready to serve to the next Mill +client that connects to it + +For a more detailed discussion of what exactly goes into "execution", see +xref:depth/execution-model.adoc[]. + + +== The Out Folder + +The `out/` directory is where most of Mill's state lives on disk, both build-task state +such as the `foo/compile.json` metadata cache for `foo.compile`, or the `foo/compile.dest` +which stores any generated files or binaries. It also contains `mill-server/` folder which +is used to pass data back and forth between the client and server: the `runArgs`, `exitCode`, +etc. + +Each task during evaluation reads and writes from its own designated paths in the `out/` +folder. Each task's files are not touched by any other tasks, nor are they used in the rest +of the Mill architecture: they are solely meant to serve each task's caching and filesystem +needs. + +More documentation on what the `out/` directory contains and how to make use of it can be +found at xref:fundamentals/out-dir.adoc[].