From 30943d1bd319323bee7663c8f3f57b33d401ec6b Mon Sep 17 00:00:00 2001 From: Son Luong Ngoc Date: Wed, 19 Apr 2023 19:13:36 +0200 Subject: [PATCH] go_tool_binary: set GOMAXPROCS to 1 (#3536) On darwin, in repositories with many transitions configured such as rules_go.git, the action GoToolchainBinaryBuild could be run multiple times in parallel by Bazel. Each of these actions would execute a command like this in a sandbox: ``` $GO build -o $OUTPUT -trimpath \ go/tools/builders/ar.go \ go/tools/builders/asm.go \ go/tools/builders/builder.go \ go/tools/builders/cgo2.go \ go/tools/builders/compilepkg.go \ go/tools/builders/cover.go \ go/tools/builders/edit.go \ go/tools/builders/embedcfg.go \ go/tools/builders/env.go \ go/tools/builders/filter.go \ go/tools/builders/filter_buildid.go \ go/tools/builders/flags.go \ go/tools/builders/generate_nogo_main.go \ go/tools/builders/generate_test_main.go \ go/tools/builders/importcfg.go \ go/tools/builders/link.go \ go/tools/builders/pack.go \ go/tools/builders/read.go \ go/tools/builders/replicate.go \ go/tools/builders/stdlib.go \ go/tools/builders/stdliblist.go \ go/tools/builders/path.go ``` Then, each `go build` process would execute multiple `go tools compile` command up to the number of max Go routines it is set to. The default max number of Go routines for each `go build` process is the number of core on the machine executing the action. On a local build, this means that we could have up to n x n number of goroutines running in parallel when combining the asynchronousity of both Bazel and `go build`. After upgrading to Go 1.20.x, we observed that such parallelism could get a local MacOS machine run into a deadlock on the OS-level, halting spawn of new processes such as `git config`, or new Chrome tab browser. Typical sampling of a `go build` process stuck in deadlock would result in the following call graphs: ``` 2430 Thread_44978 DispatchQueue_1: com.apple.main-thread (serial) + 2430 ??? (in ) [0x1358] + 2430 runtime.asmcgocall.abi0 (in go) + 124 [0x100b702ac] + 2430 runtime.syscall6.abi0 (in go) + 56 [0x100b71a98] + 2430 __wait4_nocancel (in libsystem_kernel.dylib) + 8 [0x1a17a04f4] 2430 Thread_44999 + 2430 runtime.asmcgocall.abi0 (in go) + 201 [0x100b702f9] + 2430 runtime.pthread_cond_timedwait_relative_np_trampoline.abi0 (in go) + 28 [0x100b717ec] + 2430 _pthread_cond_wait (in libsystem_pthread.dylib) + 1276 [0x1a17d45a0] + 2430 __psynch_cvwait (in libsystem_kernel.dylib) + 8 [0x1a1797710] 2430 Thread_45038 + 2430 runtime.kevent_trampoline.abi0 (in go) + 40 [0x100b71518] + 2430 kevent (in libsystem_kernel.dylib) + 8 [0x1a179a060] ``` We could not reproduce outside of Bazel (using bash & to parallelize `go build`). We also could not reproduce this issue with Go 1.19 releases. Let's apply a rudimentry workaround by limitting the concurrency of `go build` process by setting it's max Go routines to 1. This should make this and future race conditions happen a lot less on all platforms. By doing this, we rely more on Bazel and less on Go runtime to saturate our CPU resources. This change should not affect remote execution as the default ResourceSet for each action is set at 1 CPU and 250MB RAM. --- go/private/rules/binary.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/private/rules/binary.bzl b/go/private/rules/binary.bzl index 91748eda08..51530b04ca 100644 --- a/go/private/rules/binary.bzl +++ b/go/private/rules/binary.bzl @@ -430,7 +430,7 @@ def _go_tool_binary_impl(ctx): if sdk.goos == "windows": gopath = ctx.actions.declare_directory("gopath") gocache = ctx.actions.declare_directory("gocache") - cmd = "@echo off\nset GOCACHE=%cd%\\{gocache}\nset GOPATH=%cd%\\{gopath}\n{go} build -o {out} -trimpath {srcs}".format( + cmd = "@echo off\nset GOMAXPROCS=1\nset GOCACHE=%cd%\\{gocache}\nset GOPATH=%cd%\\{gopath}\n{go} build -o {out} -trimpath {srcs}".format( gopath = gopath.path, gocache = gocache.path, go = sdk.go.path.replace("/", "\\"), @@ -450,7 +450,7 @@ def _go_tool_binary_impl(ctx): ) else: # Note: GOPATH is needed for Go 1.16. - cmd = "GOCACHE=$(mktemp -d) GOPATH=$(mktemp -d) {go} build -o {out} -trimpath {srcs}".format( + cmd = "GOMAXPROCS=1 GOCACHE=$(mktemp -d) GOPATH=$(mktemp -d) {go} build -o {out} -trimpath {srcs}".format( go = sdk.go.path, out = out.path, srcs = " ".join([f.path for f in ctx.files.srcs]),