-
Notifications
You must be signed in to change notification settings - Fork 91
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add MSBuild binary log (.binlog) component detector #1250
base: main
Are you sure you want to change the base?
Changes from 4 commits
135fdd2
8b4aa9c
910caa4
eee567f
b837087
aa9d767
ffc2ece
d4ddd71
3ff2a8f
0a38a99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
namespace Microsoft.ComponentDetection.Detectors.NuGet; | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Threading; | ||
using Microsoft.Build.Locator; | ||
using Microsoft.Build.Logging.StructuredLogger; | ||
using Microsoft.ComponentDetection.Contracts; | ||
using Microsoft.ComponentDetection.Contracts.Internal; | ||
using Microsoft.ComponentDetection.Contracts.TypedComponent; | ||
using Microsoft.Extensions.Logging; | ||
|
||
using Task = System.Threading.Tasks.Task; | ||
|
||
public class NuGetMSBuildBinaryLogComponentDetector : FileComponentDetector | ||
{ | ||
private static readonly HashSet<string> TopLevelPackageItemNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase) | ||
{ | ||
"PackageReference", | ||
}; | ||
|
||
// the items listed below represent collection names that NuGet will resolve a package into, along with the metadata value names to get the package name and version | ||
private static readonly Dictionary<string, (string NameMetadata, string VersionMetadata)> ResolvedPackageItemNames = new Dictionary<string, (string, string)>(StringComparer.OrdinalIgnoreCase) | ||
{ | ||
["NativeCopyLocalItems"] = ("NuGetPackageId", "NuGetPackageVersion"), | ||
["ResourceCopyLocalItems"] = ("NuGetPackageId", "NuGetPackageVersion"), | ||
["RuntimeCopyLocalItems"] = ("NuGetPackageId", "NuGetPackageVersion"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will you read the value of these late enough in the build to understand that they haven't been reduced by ConflictResolution? |
||
["ResolvedAnalyzers"] = ("NuGetPackageId", "NuGetPackageVersion"), | ||
["_PackageDependenciesDesignTime"] = ("Name", "Version"), | ||
}; | ||
|
||
private static readonly object MSBuildRegistrationGate = new(); | ||
private static bool isMSBuildRegistered; | ||
|
||
public NuGetMSBuildBinaryLogComponentDetector( | ||
IObservableDirectoryWalkerFactory walkerFactory, | ||
ILogger<NuGetMSBuildBinaryLogComponentDetector> logger) | ||
{ | ||
this.Scanner = walkerFactory; | ||
this.Logger = logger; | ||
} | ||
|
||
public override string Id { get; } = "NuGetMSBuildBinaryLog"; | ||
|
||
public override IEnumerable<string> Categories => new[] { Enum.GetName(typeof(DetectorClass), DetectorClass.NuGet) }; | ||
|
||
public override IList<string> SearchPatterns { get; } = new List<string> { "*.binlog" }; | ||
|
||
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.NuGet }; | ||
|
||
public override int Version { get; } = 1; | ||
|
||
private static void ProcessResolvedPackageReference(Dictionary<string, HashSet<string>> topLevelDependencies, Dictionary<string, Dictionary<string, string>> projectResolvedDependencies, NamedNode node) | ||
{ | ||
var doRemoveOperation = node is RemoveItem; | ||
var doAddOperation = node is AddItem; | ||
if (TopLevelPackageItemNames.Contains(node.Name)) | ||
{ | ||
var projectEvaluation = node.GetNearestParent<ProjectEvaluation>(); | ||
if (projectEvaluation is not null) | ||
{ | ||
foreach (var child in node.Children.OfType<Item>()) | ||
{ | ||
var packageName = child.Name; | ||
if (!topLevelDependencies.TryGetValue(projectEvaluation.ProjectFile, out var topLevel)) | ||
{ | ||
topLevel = new(StringComparer.OrdinalIgnoreCase); | ||
topLevelDependencies[projectEvaluation.ProjectFile] = topLevel; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. one project file may be evaluated and built several times, you'll often see several evaluations per .csproj. Should we pick the best evaluation somehow or is it fine to do for each evaluation? Restore does an eval, then the build does another eval, and each target framework is a separate eval. We probably want a union of all evaluations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll experiment a bit to see if I can find inaccurate results, but generally we really only care about the item groups that NuGet populates like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The restore evaluation is going to be pretty useless (it gets packages downloaded but doesn't produce any of the related items). You will definitely need to do all of the inner-build evaluations so that you get the superset of references, e.g. <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net8.0;net472</TargetFrameworks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Newtonsoft.Json" Version="13.0.3"
Condition=" '$(TargetFramework)' == 'net8.0' " />
<PackageReference Include="System.Text.Json" Version="8.0.4"
Condition=" '$(TargetFramework)' == 'net472' " />
</ItemGroup>
</Project> There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just pushed a commit that covers this scenario; check the unit test for some notes, but the end result is that we're scanning the |
||
} | ||
|
||
if (doRemoveOperation) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a bit worried that you might be getting additem/removeitem from different evaluations in an interleaved way, and since they key by the project path this might get confused. On the other hand it shouldn't happen because the binlog is a tree, and you're walking the tree linearly, so in theory each evaluation should be processed sequentially one after another. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What might be an indication that the traversal is bad? If I try to process There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes I guess, but as I said, maybe I'm just being paranoid |
||
{ | ||
topLevel.Remove(packageName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Specifically I think this is going to cause problems if one TF of a project references a thing and the other gets it removed by framework unification. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you have an example of this? I'd like to add a test to make sure we don't remove anything that shouldn't be removed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try this: <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net472;net8.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Text.Json" Version="6.0.0" />
</ItemGroup>
</Project> This project DOES use STJ 6.0.0, but ONLY for the net472 TF; it should be removed by conflict resolution against the net8.0 framework in that TF. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you, this is a great example. I've updated it locally to track the add/remove operations per project evaluation ID so the evaluation of the project with |
||
} | ||
Check warning on line 76 in src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs Codecov / codecov/patchsrc/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs#L74-L76
|
||
|
||
if (doAddOperation) | ||
{ | ||
topLevel.Add(packageName); | ||
} | ||
} | ||
} | ||
} | ||
else if (ResolvedPackageItemNames.TryGetValue(node.Name, out var metadataNames)) | ||
{ | ||
var nameMetadata = metadataNames.NameMetadata; | ||
var versionMetadata = metadataNames.VersionMetadata; | ||
var originalProject = node.GetNearestParent<Project>(); | ||
if (originalProject is not null) | ||
{ | ||
foreach (var child in node.Children.OfType<Item>()) | ||
{ | ||
var packageName = GetChildMetadataValue(child, nameMetadata); | ||
var packageVersion = GetChildMetadataValue(child, versionMetadata); | ||
if (packageName is not null && packageVersion is not null) | ||
{ | ||
var project = originalProject; | ||
while (project is not null) | ||
{ | ||
if (!projectResolvedDependencies.TryGetValue(project.ProjectFile, out var projectDependencies)) | ||
{ | ||
projectDependencies = new(StringComparer.OrdinalIgnoreCase); | ||
projectResolvedDependencies[project.ProjectFile] = projectDependencies; | ||
} | ||
|
||
if (doRemoveOperation) | ||
{ | ||
projectDependencies.Remove(packageName); | ||
} | ||
|
||
if (doAddOperation) | ||
{ | ||
projectDependencies[packageName] = packageVersion; | ||
} | ||
|
||
project = project.GetNearestParent<Project>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, why are you walking up the project chain? I don't think these items flow to the calling project? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found I needed this to track transitive dependencies. E.g., if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think I understand this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We explicitly can't use the assets file, because it's not 100% correct, so we have to do it this way. The PR description explains a scenario where the assets file is wrong, but from my manual testing, this will result in a correct reporting of a project and any package that ultimately came from building it. I couldn't think of a scenario where this wasn't the case, but let me know if I missed one, it would make a great test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm saying that the targets that read the assets file should produce There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's the problem, the assets file could be wrong, so I need to crawl it manually. The PR description explains a scenario where the assets file isn't correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The assets file will still be a superset of what the project might use. You get the "real usage" by looking at the outcome of package resolution / conflict resolution. If you walk the project graph you end up trying to replay parts of the build. We shouldn't do that when we can just observe what it did. Also - the assets file isn't "Incorrect" here - it is correct from NuGet's perspective. It's just that the build has more policy that it applies to decide if it will actually use a package's contents. If that package contributes assets which are "older" than some other contribution it will be dropped. The most common case is framework, as you mentioned -- it's what we designed the feature for in the SDK. Technically it could be any conflict though - when any two packages try to provide the same file the build will compare them to decide who's copy wins. |
||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
private static string GetChildMetadataValue(TreeNode node, string metadataItemName) | ||
{ | ||
var metadata = node.Children.OfType<Metadata>(); | ||
var metadataValue = metadata.FirstOrDefault(m => m.Name.Equals(metadataItemName, StringComparison.OrdinalIgnoreCase))?.Value; | ||
return metadataValue; | ||
} | ||
|
||
protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default) | ||
{ | ||
try | ||
{ | ||
lock (MSBuildRegistrationGate) | ||
{ | ||
if (!isMSBuildRegistered) | ||
brettfo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
// this must happen once per process, and never again | ||
var defaultInstance = MSBuildLocator.QueryVisualStudioInstances().First(); | ||
MSBuildLocator.RegisterInstance(defaultInstance); | ||
isMSBuildRegistered = true; | ||
} | ||
} | ||
|
||
var singleFileComponentRecorder = this.ComponentRecorder.CreateSingleFileComponentRecorder(processRequest.ComponentStream.Location); | ||
var buildRoot = BinaryLog.ReadBuild(processRequest.ComponentStream.Stream); | ||
this.RecordLockfileVersion(buildRoot.FileFormatVersion); | ||
brettfo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.ProcessBinLog(buildRoot, singleFileComponentRecorder); | ||
} | ||
catch (Exception e) | ||
{ | ||
Check warning on line 153 in src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs Codecov / codecov/patchsrc/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs#L152-L153
|
||
// If something went wrong, just ignore the package | ||
this.Logger.LogError(e, "Failed to process MSBuild binary log {BinLogFile}", processRequest.ComponentStream.Location); | ||
} | ||
Check warning on line 156 in src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs Codecov / codecov/patchsrc/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs#L155-L156
|
||
|
||
return Task.CompletedTask; | ||
} | ||
|
||
protected override Task OnDetectionFinishedAsync() | ||
{ | ||
return Task.CompletedTask; | ||
} | ||
|
||
private void ProcessBinLog(Build buildRoot, ISingleFileComponentRecorder componentRecorder) | ||
{ | ||
// maps a project path to a set of resolved dependencies | ||
var projectTopLevelDependencies = new Dictionary<string, HashSet<string>>(StringComparer.OrdinalIgnoreCase); | ||
var projectResolvedDependencies = new Dictionary<string, Dictionary<string, string>>(StringComparer.OrdinalIgnoreCase); | ||
buildRoot.VisitAllChildren<BaseNode>(node => | ||
{ | ||
switch (node) | ||
{ | ||
case NamedNode namedNode when namedNode is AddItem or RemoveItem: | ||
ProcessResolvedPackageReference(projectTopLevelDependencies, projectResolvedDependencies, namedNode); | ||
break; | ||
default: | ||
break; | ||
} | ||
}); | ||
|
||
// dependencies were resolved per project, we need to re-arrange them to be per package/version | ||
var projectsPerPackage = new Dictionary<string, HashSet<string>>(StringComparer.OrdinalIgnoreCase); | ||
foreach (var projectPath in projectResolvedDependencies.Keys) | ||
{ | ||
if (Path.GetExtension(projectPath).Equals(".sln", StringComparison.OrdinalIgnoreCase)) | ||
{ | ||
// don't report solution files | ||
continue; | ||
} | ||
|
||
var projectDependencies = projectResolvedDependencies[projectPath]; | ||
foreach (var (packageName, packageVersion) in projectDependencies) | ||
{ | ||
var key = $"{packageName}/{packageVersion}"; | ||
if (!projectsPerPackage.TryGetValue(key, out var projectPaths)) | ||
{ | ||
projectPaths = new HashSet<string>(StringComparer.OrdinalIgnoreCase); | ||
projectsPerPackage[key] = projectPaths; | ||
} | ||
|
||
projectPaths.Add(projectPath); | ||
} | ||
} | ||
|
||
// report it all | ||
foreach (var packageNameAndVersion in projectsPerPackage.Keys.OrderBy(p => p)) | ||
{ | ||
var projectPaths = projectsPerPackage[packageNameAndVersion]; | ||
var parts = packageNameAndVersion.Split('/', 2); | ||
var packageName = parts[0]; | ||
var packageVersion = parts[1]; | ||
var component = new NuGetComponent(packageName, packageVersion); | ||
var libraryComponent = new DetectedComponent(component); | ||
foreach (var projectPath in projectPaths) | ||
{ | ||
libraryComponent.FilePaths.Add(projectPath); | ||
} | ||
|
||
componentRecorder.RegisterUsage(libraryComponent); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a concern here. Are you running as a standalone .NET 6 application currently? If so, you won't be able to load MSBuild from new SDKs. Have you tried on a machine that has only .NET 8 installed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to work in the unit tests which should only have .NET6 installed (it's installing here and if I understand that action correctly, it uses
global.json
to determine what to install, so no .NET 8 on the CI machine)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, the problem will be when you run the tool on a different machine which has only .NET 8 installed.