diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000..90f14cb --- /dev/null +++ b/.bazelrc @@ -0,0 +1,4 @@ +# Print results from tests +build --test_output=all +build --test_summary=terse +build --incompatible_default_to_explicit_init_py diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..19b860c --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +6.4.0 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 56eb1a4..eac31b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,4 +32,4 @@ jobs: env: # Bazelisk will download bazel to here XDG_CACHE_HOME: ~/.cache/bazel-repo - run: bazel test //... + run: bazel test //... --jobs 2 # limit number of jobs to prevent broken pipe error diff --git a/WORKSPACE b/WORKSPACE index 75e5a9f..5f26926 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -16,28 +16,21 @@ workspace(name = "com_github_benchsci_rules_python_gazelle") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -http_archive( - name = "rules_python", - sha256 = "a30abdfc7126d497a7698c29c46ea9901c6392d6ed315171a6df5ce433aa4502", - strip_prefix = "rules_python-0.6.0", - url = "https://github.com/bazelbuild/rules_python/archive/0.6.0.tar.gz", -) - http_archive( name = "io_bazel_rules_go", - sha256 = "278b7ff5a826f3dc10f04feaf0b70d48b68748ccd512d7f98bf442077f043fe3", + sha256 = "91585017debb61982f7054c9688857a2ad1fd823fc3f9cb05048b0025c47d023", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.41.0/rules_go-v0.41.0.zip", - "https://github.com/bazelbuild/rules_go/releases/download/v0.41.0/rules_go-v0.41.0.zip", + "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.42.0/rules_go-v0.42.0.zip", + "https://github.com/bazelbuild/rules_go/releases/download/v0.42.0/rules_go-v0.42.0.zip", ], ) http_archive( name = "bazel_gazelle", - sha256 = "d3fa66a39028e97d76f9e2db8f1b0c11c099e8e01bf363a923074784e451f809", + sha256 = "b7387f72efb59f876e4daae42f1d3912d0d45563eac7cb23d1de0b094ab588cf", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.33.0/bazel-gazelle-v0.33.0.tar.gz", - "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.33.0/bazel-gazelle-v0.33.0.tar.gz", + "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.34.0/bazel-gazelle-v0.34.0.tar.gz", + "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.34.0/bazel-gazelle-v0.34.0.tar.gz", ], ) @@ -53,3 +46,20 @@ go_rules_dependencies() go_register_toolchains(version = "1.20.5") gazelle_dependencies() + +http_archive( + name = "rules_python", + sha256 = "9d04041ac92a0985e344235f5d946f71ac543f1b1565f2cdbc9a2aaee8adf55b", + strip_prefix = "rules_python-0.26.0", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.26.0/rules_python-0.26.0.tar.gz", +) + +load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") + +# Add python 3.11 toolchain +python_register_toolchains( + name = "python_3_11", + python_version = "3.11", +) + +py_repositories() diff --git a/gazelle/BUILD.bazel b/gazelle/BUILD.bazel index 94e6f35..2e0d08c 100644 --- a/gazelle/BUILD.bazel +++ b/gazelle/BUILD.bazel @@ -10,11 +10,13 @@ go_library( "generate.go", "kinds.go", "language.go", + "lifecycle.go", "parser.go", "resolve.go", "std_modules.go", "target.go", ], + embedsrcs = [":helper.zip"], # keep importpath = "github.com/benchsci/rules_python_gazelle/gazelle", visibility = ["//visibility:public"], deps = [ @@ -50,6 +52,25 @@ py_binary( visibility = ["//visibility:public"], ) +py_binary( + name = "helper", + srcs = [ + "__main__.py", + "parse.py", + "std_modules.py", + ], + # This is to make sure that the current directory is added to PYTHONPATH + imports = ["."], + main = "__main__.py", + visibility = ["//visibility:public"], +) + +filegroup( + name = "helper.zip", + srcs = [":helper"], + output_group = "python_zip_file", +) + TEST_DEPS = [ "@bazel_gazelle//testtools:go_default_library", "@com_github_emirpasic_gods//lists/singlylinkedlist", diff --git a/gazelle/__main__.py b/gazelle/__main__.py new file mode 100644 index 0000000..18bc1ca --- /dev/null +++ b/gazelle/__main__.py @@ -0,0 +1,32 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# parse.py is a long-living program that communicates over STDIN and STDOUT. +# STDIN receives parse requests, one per line. It outputs the parsed modules and +# comments from all the files from each request. + +import sys + +import parse +import std_modules + +if __name__ == "__main__": + if len(sys.argv) < 2: + sys.exit("Please provide subcommand, either print or std_modules") + if sys.argv[1] == "parse": + sys.exit(parse.main(sys.stdin, sys.stdout)) + elif sys.argv[1] == "std_modules": + sys.exit(std_modules.main(sys.stdin, sys.stdout)) + else: + sys.exit("Unknown subcommand: " + sys.argv[1]) diff --git a/gazelle/language.go b/gazelle/language.go index 877ac6d..3b13278 100644 --- a/gazelle/language.go +++ b/gazelle/language.go @@ -9,6 +9,7 @@ import ( type Python struct { Configurer Resolver + LifeCycleManager } // NewLanguage initializes a new Python that satisfies the language.Language diff --git a/gazelle/lifecycle.go b/gazelle/lifecycle.go new file mode 100644 index 0000000..6d628e9 --- /dev/null +++ b/gazelle/lifecycle.go @@ -0,0 +1,63 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + _ "embed" + "github.com/bazelbuild/bazel-gazelle/language" + "log" + "os" +) + +var ( + //go:embed helper.zip + helperZip []byte + helperPath string +) + +type LifeCycleManager struct { + language.BaseLifecycleManager + pyzFilePath string +} + +func (l *LifeCycleManager) Before(ctx context.Context) { + helperPath = os.Getenv("GAZELLE_PYTHON_HELPER") + if helperPath == "" { + pyzFile, err := os.CreateTemp("", "python_zip_") + if err != nil { + log.Fatalf("failed to write parser zip: %v", err) + } + defer pyzFile.Close() + helperPath = pyzFile.Name() + l.pyzFilePath = helperPath + if _, err := pyzFile.Write(helperZip); err != nil { + log.Fatalf("cannot write %q: %v", helperPath, err) + } + } + startParserProcess(ctx) + startStdModuleProcess(ctx) +} + +func (l *LifeCycleManager) DoneGeneratingRules() { + shutdownParserProcess() +} + +func (l *LifeCycleManager) AfterResolvingDeps(ctx context.Context) { + shutdownStdModuleProcess() + if l.pyzFilePath != "" { + os.Remove(l.pyzFilePath) + } +} diff --git a/gazelle/modules_mapping/BUILD.bazel b/gazelle/modules_mapping/BUILD.bazel index 5788007..0ef8db8 100644 --- a/gazelle/modules_mapping/BUILD.bazel +++ b/gazelle/modules_mapping/BUILD.bazel @@ -3,6 +3,7 @@ load("@rules_python//python:defs.bzl", "py_binary", "py_test") py_binary( name = "generator", srcs = ["generator.py"], + imports = ["."], main = "generator.py", visibility = ["//visibility:public"], ) @@ -11,5 +12,13 @@ py_test( name = "test_generator", srcs = ["test_generator.py"], data = glob(["testdata/**"]), + imports = ["."], main = "test_generator.py", + deps = [":generator"], +) + +filegroup( + name = "distribution", + srcs = glob(["**"]), + visibility = ["//:__pkg__"], ) diff --git a/gazelle/modules_mapping/test_generator.py b/gazelle/modules_mapping/test_generator.py index 21b28ef..50200fc 100644 --- a/gazelle/modules_mapping/test_generator.py +++ b/gazelle/modules_mapping/test_generator.py @@ -1,7 +1,8 @@ import pathlib -from generator import Generator import unittest +from generator import Generator + class GeneratorTest(unittest.TestCase): def test_generator(self): @@ -28,7 +29,10 @@ def test_stub_generator(self): gen = Generator(None, None) mapping = gen.dig_wheel(whl) self.assertLessEqual( - {"django_types": "django_types",}.items(), mapping.items(), + { + "django_types": "django_types", + }.items(), + mapping.items(), ) diff --git a/gazelle/parse.py b/gazelle/parse.py index b5827ff..d81633a 100644 --- a/gazelle/parse.py +++ b/gazelle/parse.py @@ -9,6 +9,7 @@ import re import sys from io import BytesIO +from multiprocessing import cpu_count from tokenize import COMMENT, tokenize @@ -88,7 +89,9 @@ def parse(repo_root, rel_package_path, filename): def main(stdin, stdout): - with concurrent.futures.ProcessPoolExecutor() as executor: + with concurrent.futures.ProcessPoolExecutor( + max_workers=max(cpu_count() - 1, 1) + ) as executor: for parse_request in stdin: parse_request = json.loads(parse_request) repo_root = parse_request["repo_root"] diff --git a/gazelle/parser.go b/gazelle/parser.go index 8c916fc..f649677 100644 --- a/gazelle/parser.go +++ b/gazelle/parser.go @@ -3,6 +3,7 @@ package python import ( "bufio" "context" + _ "embed" "encoding/json" "fmt" "io" @@ -12,58 +13,52 @@ import ( "sort" "strings" "sync" - "time" - "github.com/bazelbuild/rules_go/go/tools/bazel" "github.com/emirpasic/gods/sets/treeset" godsutils "github.com/emirpasic/gods/utils" ) var ( - parserStdin io.Writer + parserCmd *exec.Cmd + parserStdin io.WriteCloser parserStdout io.Reader parserMutex sync.Mutex ) -func init() { - parseScriptRunfile, err := bazel.Runfile("gazelle/parse") - if err != nil { - log.Printf("failed to initialize parser: %v\n", err) - os.Exit(1) - } - - ctx := context.Background() - ctx, parserCancel := context.WithTimeout(ctx, time.Minute*5) - cmd := exec.CommandContext(ctx, parseScriptRunfile) - - cmd.Stderr = os.Stderr +func startParserProcess(ctx context.Context) { + // due to #691, we need a system interpreter to boostrap, part of which is + // to locate the hermetic interpreter. + parserCmd = exec.CommandContext(ctx, "python3", helperPath, "parse") + parserCmd.Stderr = os.Stderr - stdin, err := cmd.StdinPipe() + stdin, err := parserCmd.StdinPipe() if err != nil { log.Printf("failed to initialize parser: %v\n", err) os.Exit(1) } parserStdin = stdin - stdout, err := cmd.StdoutPipe() + stdout, err := parserCmd.StdoutPipe() if err != nil { log.Printf("failed to initialize parser: %v\n", err) os.Exit(1) } parserStdout = stdout - if err := cmd.Start(); err != nil { + if err := parserCmd.Start(); err != nil { log.Printf("failed to initialize parser: %v\n", err) os.Exit(1) } +} - go func() { - defer parserCancel() - if err := cmd.Wait(); err != nil { - log.Printf("failed to wait for parser: %v\n", err) - os.Exit(1) - } - }() +func shutdownParserProcess() { + if err := parserStdin.Close(); err != nil { + fmt.Fprintf(os.Stderr, "error closing parser: %v", err) + } + + if err := parserCmd.Wait(); err != nil { + log.Printf("failed to wait for parser: %v\n", err) + } } // python3Parser implements a parser for Python files that extracts the modules diff --git a/gazelle/std_modules.go b/gazelle/std_modules.go index f7d0c24..8a016af 100644 --- a/gazelle/std_modules.go +++ b/gazelle/std_modules.go @@ -1,8 +1,23 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package python import ( "bufio" "context" + _ "embed" "fmt" "io" "log" @@ -11,60 +26,54 @@ import ( "strconv" "strings" "sync" - "time" - - "github.com/bazelbuild/rules_go/go/tools/bazel" ) var ( - stdModulesStdin io.Writer + stdModulesCmd *exec.Cmd + stdModulesStdin io.WriteCloser stdModulesStdout io.Reader stdModulesMutex sync.Mutex stdModulesSeen map[string]struct{} ) -func init() { +func startStdModuleProcess(ctx context.Context) { stdModulesSeen = make(map[string]struct{}) - stdModulesScriptRunfile, err := bazel.Runfile("gazelle/std_modules") - if err != nil { - log.Printf("failed to initialize std_modules: %v\n", err) - os.Exit(1) - } - - ctx := context.Background() - ctx, stdModulesCancel := context.WithTimeout(ctx, time.Minute*5) - cmd := exec.CommandContext(ctx, stdModulesScriptRunfile) - - cmd.Stderr = os.Stderr - cmd.Env = []string{} + // due to #691, we need a system interpreter to boostrap, part of which is + // to locate the hermetic interpreter. + stdModulesCmd = exec.CommandContext(ctx, "python3", helperPath, "std_modules") + stdModulesCmd.Stderr = os.Stderr + // All userland site-packages should be ignored. + stdModulesCmd.Env = []string{"PYTHONNOUSERSITE=1"} - stdin, err := cmd.StdinPipe() + stdin, err := stdModulesCmd.StdinPipe() if err != nil { log.Printf("failed to initialize std_modules: %v\n", err) os.Exit(1) } stdModulesStdin = stdin - stdout, err := cmd.StdoutPipe() + stdout, err := stdModulesCmd.StdoutPipe() if err != nil { log.Printf("failed to initialize std_modules: %v\n", err) os.Exit(1) } stdModulesStdout = stdout - if err := cmd.Start(); err != nil { + if err := stdModulesCmd.Start(); err != nil { log.Printf("failed to initialize std_modules: %v\n", err) os.Exit(1) } +} + +func shutdownStdModuleProcess() { + if err := stdModulesStdin.Close(); err != nil { + fmt.Fprintf(os.Stderr, "error closing std module: %v", err) + } - go func() { - defer stdModulesCancel() - if err := cmd.Wait(); err != nil { - log.Printf("failed to wait for std_modules: %v\n", err) - os.Exit(1) - } - }() + if err := stdModulesCmd.Wait(); err != nil { + log.Printf("failed to wait for std_modules: %v\n", err) + } } func isStdModule(m module) (bool, error) { diff --git a/gazelle/std_modules.py b/gazelle/std_modules.py index ccd1dcd..779a325 100644 --- a/gazelle/std_modules.py +++ b/gazelle/std_modules.py @@ -1,32 +1,44 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # std_modules.py is a long-living program that communicates over STDIN and # STDOUT. STDIN receives module names, one per line. For each module statement # it evaluates, it outputs true/false for whether the module is part of the # standard library or not. -import site +import os import sys - - -# Don't return any paths, all userland site-packages should be ignored. -def __override_getusersitepackages__(): - return "" - - -site.getusersitepackages = __override_getusersitepackages__ +from contextlib import redirect_stdout def is_std_modules(module): - try: - __import__(module, globals(), locals(), [], 0) - return True - except Exception: - return False + # If for some reason a module (such as pygame, see https://github.com/pygame/pygame/issues/542) + # prints to stdout upon import, + # the output of this script should still be parseable by golang. + # Therefore, redirect stdout while running the import. + with redirect_stdout(os.devnull): + try: + __import__(module, globals(), locals(), [], 0) + return True + except Exception: + return False def main(stdin, stdout): for module in stdin: module = module.strip() - # Don't print the boolean directly as it is captilized in Python. + # Don't print the boolean directly as it is capitalized in Python. print( "true" if is_std_modules(module) else "false", end="\n",