Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add MSBuild binary log (.binlog) component detector #1250

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@
<PackageVersion Include="MinVer" Version="5.0.0" />
<PackageVersion Include="Moq" Version="4.18.4" />
<PackageVersion Include="morelinq" Version="4.2.0" />
<PackageVersion Include="MSBuild.StructuredLogger" Version="2.2.317" />
<PackageVersion Include="MSTest.TestFramework" Version="3.5.1" />
<PackageVersion Include="MSTest.Analyzers" Version="3.5.1" />
<PackageVersion Include="MSTest.TestAdapter" Version="3.5.1" />
<PackageVersion Include="Microsoft.Build.Framework" Version="17.5.0" />
<PackageVersion Include="Microsoft.Build.Locator" Version="1.6.1" />
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a concern here. Are you running as a standalone .NET 6 application currently? If so, you won't be able to load MSBuild from new SDKs. Have you tried on a machine that has only .NET 8 installed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to work in the unit tests which should only have .NET6 installed (it's installing here and if I understand that action correctly, it uses global.json to determine what to install, so no .NET 8 on the CI machine)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, the problem will be when you run the tool on a different machine which has only .NET 8 installed.

<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
<PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
<PackageVersion Include="Newtonsoft.Json.Schema" Version="3.0.16" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
<PackageReference Include="Polly" />
<PackageReference Include="SemanticVersioning" />
<PackageReference Include="yamldotnet" />
<PackageReference Include="Microsoft.Build.Framework" ExcludeAssets="Runtime" PrivateAssets="All" />
<PackageReference Include="Microsoft.Build.Locator" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" />
<PackageReference Include="MSBuild.StructuredLogger" />
<PackageReference Include="Newtonsoft.Json" />
<PackageReference Include="System.Reactive" />
<PackageReference Include="System.Threading.Tasks.Dataflow" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
namespace Microsoft.ComponentDetection.Detectors.NuGet;

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using Microsoft.Build.Locator;
using Microsoft.Build.Logging.StructuredLogger;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.Internal;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;

using Task = System.Threading.Tasks.Task;

public class NuGetMSBuildBinaryLogComponentDetector : FileComponentDetector
{
private static readonly HashSet<string> TopLevelPackageItemNames = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
{
"PackageReference",
};

// the items listed below represent collection names that NuGet will resolve a package into, along with the metadata value names to get the package name and version
private static readonly Dictionary<string, (string NameMetadata, string VersionMetadata)> ResolvedPackageItemNames = new Dictionary<string, (string, string)>(StringComparer.OrdinalIgnoreCase)
{
["NativeCopyLocalItems"] = ("NuGetPackageId", "NuGetPackageVersion"),
["ResourceCopyLocalItems"] = ("NuGetPackageId", "NuGetPackageVersion"),
["RuntimeCopyLocalItems"] = ("NuGetPackageId", "NuGetPackageVersion"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

["ResolvedAnalyzers"] = ("NuGetPackageId", "NuGetPackageVersion"),
["_PackageDependenciesDesignTime"] = ("Name", "Version"),
};

private static readonly object MSBuildRegistrationGate = new();
private static bool isMSBuildRegistered;

public NuGetMSBuildBinaryLogComponentDetector(
IObservableDirectoryWalkerFactory walkerFactory,
ILogger<NuGetMSBuildBinaryLogComponentDetector> logger)
{
this.Scanner = walkerFactory;
this.Logger = logger;
}

public override string Id { get; } = "NuGetMSBuildBinaryLog";

public override IEnumerable<string> Categories => new[] { Enum.GetName(typeof(DetectorClass), DetectorClass.NuGet) };

public override IList<string> SearchPatterns { get; } = new List<string> { "*.binlog" };

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.NuGet };

public override int Version { get; } = 1;

private static void ProcessResolvedPackageReference(Dictionary<string, HashSet<string>> topLevelDependencies, Dictionary<string, Dictionary<string, string>> projectResolvedDependencies, NamedNode node)
{
var doRemoveOperation = node is RemoveItem;
var doAddOperation = node is AddItem;
if (TopLevelPackageItemNames.Contains(node.Name))
{
var projectEvaluation = node.GetNearestParent<ProjectEvaluation>();
if (projectEvaluation is not null)
{
foreach (var child in node.Children.OfType<Item>())
{
var packageName = child.Name;
if (!topLevelDependencies.TryGetValue(projectEvaluation.ProjectFile, out var topLevel))
{
topLevel = new(StringComparer.OrdinalIgnoreCase);
topLevelDependencies[projectEvaluation.ProjectFile] = topLevel;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one project file may be evaluated and built several times, you'll often see several evaluations per .csproj. Should we pick the best evaluation somehow or is it fine to do for each evaluation? Restore does an eval, then the build does another eval, and each target framework is a separate eval. We probably want a union of all evaluations?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll experiment a bit to see if I can find inaccurate results, but generally we really only care about the item groups that NuGet populates like @(RuntimeCopyLocalItems) and I think that only happens during the Restore evaluation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The restore evaluation is going to be pretty useless (it gets packages downloaded but doesn't produce any of the related items). You will definitely need to do all of the inner-build evaluations so that you get the superset of references, e.g.

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFrameworks>net8.0;net472</TargetFrameworks>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Newtonsoft.Json" Version="13.0.3"
                      Condition=" '$(TargetFramework)' == 'net8.0' " />
    <PackageReference Include="System.Text.Json" Version="8.0.4"
                      Condition=" '$(TargetFramework)' == 'net472' " />
  </ItemGroup>

</Project>

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed a commit that covers this scenario; check the unit test for some notes, but the end result is that we're scanning the .binlog correctly and after a dotnet build /bl we'd pick up and report both Newtonsoft.Json/13.0.3 and System.Text.Json/8.0.4 and ultimately, that's the end goal: map a .csproj (or rather anything that's not .sln) to a set of package names and versions that came from it, regardless of the TFM that it came from.

}

if (doRemoveOperation)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a bit worried that you might be getting additem/removeitem from different evaluations in an interleaved way, and since they key by the project path this might get confused.

On the other hand it shouldn't happen because the binlog is a tree, and you're walking the tree linearly, so in theory each evaluation should be processed sequentially one after another.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What might be an indication that the traversal is bad? If I try to process RemoveItem but the item isn't already present? Or is that something that MSBuild won't allow?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes I guess, but as I said, maybe I'm just being paranoid

{
topLevel.Remove(packageName);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically I think this is going to cause problems if one TF of a project references a thing and the other gets it removed by framework unification.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have an example of this? I'd like to add a test to make sure we don't remove anything that shouldn't be removed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try this:

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFrameworks>net472;net8.0</TargetFrameworks>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="System.Text.Json" Version="6.0.0" />
  </ItemGroup>

</Project>

This project DOES use STJ 6.0.0, but ONLY for the net472 TF; it should be removed by conflict resolution against the net8.0 framework in that TF.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, this is a great example. I've updated it locally to track the add/remove operations per project evaluation ID so the evaluation of the project with net8.0 doesn't remove the add operation from net472. I'll work on adding a test for this.

}

Check warning on line 76 in src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs#L74-L76

Added lines #L74 - L76 were not covered by tests

if (doAddOperation)
{
topLevel.Add(packageName);
}
}
}
}
else if (ResolvedPackageItemNames.TryGetValue(node.Name, out var metadataNames))
{
var nameMetadata = metadataNames.NameMetadata;
var versionMetadata = metadataNames.VersionMetadata;
var originalProject = node.GetNearestParent<Project>();
if (originalProject is not null)
{
foreach (var child in node.Children.OfType<Item>())
{
var packageName = GetChildMetadataValue(child, nameMetadata);
var packageVersion = GetChildMetadataValue(child, versionMetadata);
if (packageName is not null && packageVersion is not null)
{
var project = originalProject;
while (project is not null)
{
if (!projectResolvedDependencies.TryGetValue(project.ProjectFile, out var projectDependencies))
{
projectDependencies = new(StringComparer.OrdinalIgnoreCase);
projectResolvedDependencies[project.ProjectFile] = projectDependencies;
}

if (doRemoveOperation)
{
projectDependencies.Remove(packageName);
}

if (doAddOperation)
{
projectDependencies[packageName] = packageVersion;
}

project = project.GetNearestParent<Project>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, why are you walking up the project chain? I don't think these items flow to the calling project?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found I needed this to track transitive dependencies. E.g., if library.csproj has <PackageReference Include="Some.Package" /> and unitTests.csproj has <ProjectReference Include="..\library\library.csproj" /> this will add Some.Package first to library.csproj and then crawl up the chain to unitTests.csproj so that the dependency is properly reported for both projects. The only oddity with this (and I may have to special case it) is the .sln file also appears in a Project node, but I do seem to be getting the proper hierarchy.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I understand this. unitTests.csproj should have Some.Package in its assets file and pull assets out of it in its ResolvePackageAssets. Can you share a log or project setup where this is necessary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We explicitly can't use the assets file, because it's not 100% correct, so we have to do it this way. The PR description explains a scenario where the assets file is wrong, but from my manual testing, this will result in a correct reporting of a project and any package that ultimately came from building it. I couldn't think of a scenario where this wasn't the case, but let me know if I missed one, it would make a great test.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm saying that the targets that read the assets file should produce Some.Package in the transitive case, so walking the project graph here doesn't make sense to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...that read the assets file...

That's the problem, the assets file could be wrong, so I need to crawl it manually. The PR description explains a scenario where the assets file isn't correct.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The assets file will still be a superset of what the project might use. You get the "real usage" by looking at the outcome of package resolution / conflict resolution.

If you walk the project graph you end up trying to replay parts of the build. We shouldn't do that when we can just observe what it did.

Also - the assets file isn't "Incorrect" here - it is correct from NuGet's perspective. It's just that the build has more policy that it applies to decide if it will actually use a package's contents. If that package contributes assets which are "older" than some other contribution it will be dropped. The most common case is framework, as you mentioned -- it's what we designed the feature for in the SDK. Technically it could be any conflict though - when any two packages try to provide the same file the build will compare them to decide who's copy wins.

}
}
}
}
}
}

private static string GetChildMetadataValue(TreeNode node, string metadataItemName)
{
var metadata = node.Children.OfType<Metadata>();
var metadataValue = metadata.FirstOrDefault(m => m.Name.Equals(metadataItemName, StringComparison.OrdinalIgnoreCase))?.Value;
return metadataValue;
}

protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
try
{
lock (MSBuildRegistrationGate)
{
if (!isMSBuildRegistered)
brettfo marked this conversation as resolved.
Show resolved Hide resolved
{
// this must happen once per process, and never again
var defaultInstance = MSBuildLocator.QueryVisualStudioInstances().First();
MSBuildLocator.RegisterInstance(defaultInstance);
isMSBuildRegistered = true;
}
}

var singleFileComponentRecorder = this.ComponentRecorder.CreateSingleFileComponentRecorder(processRequest.ComponentStream.Location);
var buildRoot = BinaryLog.ReadBuild(processRequest.ComponentStream.Stream);
this.RecordLockfileVersion(buildRoot.FileFormatVersion);
brettfo marked this conversation as resolved.
Show resolved Hide resolved
this.ProcessBinLog(buildRoot, singleFileComponentRecorder);
}
catch (Exception e)
{

Check warning on line 153 in src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs#L152-L153

Added lines #L152 - L153 were not covered by tests
// If something went wrong, just ignore the package
this.Logger.LogError(e, "Failed to process MSBuild binary log {BinLogFile}", processRequest.ComponentStream.Location);
}

Check warning on line 156 in src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/nuget/NuGetMSBuildBinaryLogComponentDetector.cs#L155-L156

Added lines #L155 - L156 were not covered by tests

return Task.CompletedTask;
}

protected override Task OnDetectionFinishedAsync()
{
return Task.CompletedTask;
}

private void ProcessBinLog(Build buildRoot, ISingleFileComponentRecorder componentRecorder)
{
// maps a project path to a set of resolved dependencies
var projectTopLevelDependencies = new Dictionary<string, HashSet<string>>(StringComparer.OrdinalIgnoreCase);
var projectResolvedDependencies = new Dictionary<string, Dictionary<string, string>>(StringComparer.OrdinalIgnoreCase);
buildRoot.VisitAllChildren<BaseNode>(node =>
{
switch (node)
{
case NamedNode namedNode when namedNode is AddItem or RemoveItem:
ProcessResolvedPackageReference(projectTopLevelDependencies, projectResolvedDependencies, namedNode);
break;
default:
break;
}
});

// dependencies were resolved per project, we need to re-arrange them to be per package/version
var projectsPerPackage = new Dictionary<string, HashSet<string>>(StringComparer.OrdinalIgnoreCase);
foreach (var projectPath in projectResolvedDependencies.Keys)
{
if (Path.GetExtension(projectPath).Equals(".sln", StringComparison.OrdinalIgnoreCase))
{
// don't report solution files
continue;
}

var projectDependencies = projectResolvedDependencies[projectPath];
foreach (var (packageName, packageVersion) in projectDependencies)
{
var key = $"{packageName}/{packageVersion}";
if (!projectsPerPackage.TryGetValue(key, out var projectPaths))
{
projectPaths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
projectsPerPackage[key] = projectPaths;
}

projectPaths.Add(projectPath);
}
}

// report it all
foreach (var packageNameAndVersion in projectsPerPackage.Keys.OrderBy(p => p))
{
var projectPaths = projectsPerPackage[packageNameAndVersion];
var parts = packageNameAndVersion.Split('/', 2);
var packageName = parts[0];
var packageVersion = parts[1];
var component = new NuGetComponent(packageName, packageVersion);
var libraryComponent = new DetectedComponent(component);
foreach (var projectPath in projectPaths)
{
libraryComponent.FilePaths.Add(projectPath);
}

componentRecorder.RegisterUsage(libraryComponent);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
services.AddSingleton<IComponentDetector, NuGetComponentDetector>();
services.AddSingleton<IComponentDetector, NuGetPackagesConfigDetector>();
services.AddSingleton<IComponentDetector, NuGetProjectModelProjectCentricComponentDetector>();
services.AddSingleton<IComponentDetector, NuGetMSBuildBinaryLogComponentDetector>();

// PIP
services.AddSingleton<IPyPiClient, PyPiClient>();
Expand Down
Loading
Loading