From 0ca263ec8c90e95a996aecfce4927dad8c2f0dc9 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 6 Mar 2024 10:51:04 -0800 Subject: [PATCH 1/5] Simplify --platform (-p) option After internal testing and review, we reached the conclusion that using -p to both define and filter platforms was too confusing and difficult to teach. Additionally, there are concerns that use of flags to define platforms may not scale, since real-life use-cases may require more than a list of commands. This commit is a partial revert of #49: the ability to filter platforms remains, but the ability to define platforms directly via JSON is removed. Signed-off-by: John Pennycook --- bin/codebasin | 47 +++++++---------------------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/bin/codebasin b/bin/codebasin index 13ec52d..8190c97 100755 --- a/bin/codebasin +++ b/bin/codebasin @@ -127,10 +127,9 @@ def main(): metavar="", action="append", default=[], - help="Add the specified platform to the analysis. " - + "May be a name or a path to a compilation database. " + help="Include the specified platform in the analysis. " + "May be specified multiple times. " - + "If not specified, all known platforms will be included.", + + "If not specified, all platforms will be included.", ) # The analysis-file argument is optional while we support the -c option. parser.add_argument( @@ -190,33 +189,9 @@ def main(): "Cannot use --config (-c) with TOML analysis files.", ) - # Process the -p flag first to infer wider context. - filtered_platforms = [] - additional_platforms = [] - for p in args.platforms: - # If it's a path, it has to be a compilation database. - if os.path.exists(p): - if not os.path.splitext(p)[1] == ".json": - raise RuntimeError(f"Platform file {p} must end in .json.") - additional_platforms.append(p) - continue - - # Otherwise, treat it as a name in the configuration file. - # Explain the logic above in cases that look suspiciously like paths. - if "/" in p or os.path.splitext(p)[1] == ".json": - logging.getLogger("codebasin").warning( - f"{p} doesn't exist, so will be treated as a name.", - ) - filtered_platforms.append(p) - - # A legacy config file is required if: - # - No additional platforms are specified; and - # - No TOML analysis file is specified + # If no file is specified, legacy behavior checks for config.yaml config_file = args.config_file - config_required = ( - len(additional_platforms) == 0 and args.analysis_file is None - ) - if config_file is None and config_required: + if args.config_file is None and args.analysis_file is None: warnings.warn( "Implicitly defined configuration files are deprecated.", DeprecationWarning, @@ -251,7 +226,7 @@ def main(): config_file, rootdir, exclude_patterns=args.excludes, - filtered_platforms=filtered_platforms, + filtered_platforms=args.platforms, ) # Load the analysis file if it exists. @@ -259,7 +234,7 @@ def main(): path = os.path.realpath(args.analysis_file) if os.path.exists(path): if not os.path.splitext(path)[1] == ".toml": - raise RuntimeError(f"Analysis file {p} must end in .toml.") + raise RuntimeError(f"Analysis file {path} must end in .toml.") with util.safe_open_read_nofollow(path, "rb") as f: try: @@ -273,7 +248,7 @@ def main(): codebase["exclude_patterns"] += excludes for name in analysis_toml["platform"].keys(): - if filtered_platforms and name not in filtered_platforms: + if args.platforms and name not in args.platforms: continue if "commands" not in analysis_toml["platform"][name]: raise ValueError(f"Missing 'commands' for platform {name}") @@ -281,14 +256,6 @@ def main(): db = config.load_database(p, rootdir) configuration.update({name: db}) - # Extend configuration with any additional platforms. - for p in additional_platforms: - name = os.path.splitext(os.path.basename(p))[0] - if name in codebase["platforms"]: - raise RuntimeError(f"Platform name {p} conflicts with {name}.") - db = config.load_database(p, rootdir) - configuration.update({name: db}) - # Parse the source tree, and determine source line associations. # The trees and associations are housed in state. legacy_warnings = True if config_file else False From 4cadabe7ee587f44e0927a985aeba36418ac5b97 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 6 Mar 2024 11:05:04 -0800 Subject: [PATCH 2/5] Throw error if platform passed to -p doesn't exist Signed-off-by: John Pennycook --- bin/codebasin | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bin/codebasin b/bin/codebasin index 8190c97..dbdf96c 100755 --- a/bin/codebasin +++ b/bin/codebasin @@ -247,6 +247,13 @@ def main(): excludes = analysis_toml["codebase"]["exclude"] codebase["exclude_patterns"] += excludes + for name in args.platforms: + if name not in analysis_toml["platform"].keys(): + raise RuntimeError( + f"Platform {name} requested on the command line " + + "does not exist in the configuration file.", + ) + for name in analysis_toml["platform"].keys(): if args.platforms and name not in args.platforms: continue From a09cbe6653ee8f6c9bd4c4ceda3d590abfec2c63 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 6 Mar 2024 11:13:37 -0800 Subject: [PATCH 3/5] Add platforms from TOML file to codebase platforms Signed-off-by: John Pennycook --- bin/codebasin | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/codebasin b/bin/codebasin index dbdf96c..cb05d85 100755 --- a/bin/codebasin +++ b/bin/codebasin @@ -261,6 +261,7 @@ def main(): raise ValueError(f"Missing 'commands' for platform {name}") p = analysis_toml["platform"][name]["commands"] db = config.load_database(p, rootdir) + codebase["platforms"].append(name) configuration.update({name: db}) # Parse the source tree, and determine source line associations. From 2abf3d1081d2c2942860a14049493e8638f466f4 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 6 Mar 2024 10:44:17 -0800 Subject: [PATCH 4/5] Fix dendrogram names Separates legacy behavior (YAML files) and modern behavior (TOML files). When using TOML files, the output name is updated to reflect the subset of platforms that were actually included in the analysis. Signed-off-by: John Pennycook --- bin/codebasin | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/codebasin b/bin/codebasin index cb05d85..fe73aef 100755 --- a/bin/codebasin +++ b/bin/codebasin @@ -306,11 +306,17 @@ def main(): # Print clustering report if report_enabled("clustering"): - if config_file is None: - platform_names = [p[0] for p in args.platforms] - output_prefix = "-".join(platform_names) - else: + # Legacy behavior: guess prefix from YAML filename + if config_file is not None: output_prefix = os.path.realpath(guess_project_name(config_file)) + + # Modern behavior: append platforms to TOML filename + else: + basename = os.path.basename(args.analysis_file) + filename = os.path.splitext(basename)[0] + platform_names = [p for p in codebase["platforms"]] + output_prefix = "-".join([filename] + platform_names) + clustering_output_name = output_prefix + "-dendrogram.png" clustering = report.clustering(clustering_output_name, setmap) if clustering is not None: From ff83d44f7895d4e19a1df4d131162e8fea5ce9d8 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 12 Mar 2024 07:10:27 -0700 Subject: [PATCH 5/5] Use KeyError for missing platform name error Signed-off-by: John Pennycook --- bin/codebasin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/codebasin b/bin/codebasin index fe73aef..216a5c6 100755 --- a/bin/codebasin +++ b/bin/codebasin @@ -249,7 +249,7 @@ def main(): for name in args.platforms: if name not in analysis_toml["platform"].keys(): - raise RuntimeError( + raise KeyError( f"Platform {name} requested on the command line " + "does not exist in the configuration file.", )