Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Users/oakeredolu/newpipdetector #684

Merged
merged 26 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3052db7
Added a new client for SimplePypi and fixed disposal in the original …
Omotola Jul 25, 2023
9ec59fe
formatting and refactoring
Omotola Jul 26, 2023
764d003
Change newtonsoft.json to system.text.json and implemented HttpClient…
Omotola Jul 27, 2023
9809cb6
moved httpclient registration to cd servicecollection
Omotola Jul 27, 2023
5682602
Moved simplepypiclienttest out of the test utilities folder
Omotola Jul 28, 2023
bbb1c52
Merge branch 'main' of https://github.com/microsoft/component-detecti…
Omotola Jul 28, 2023
5aea21e
Updated FetchPackageFileStreamAsync to take only the url as an argume…
Omotola Jul 31, 2023
e90fc9e
Merge branch 'users/oakeredolu/newpipclient' of https://github.com/mi…
Omotola Jul 31, 2023
7c86425
Created new PythonResolver for SimplePypiClient
Omotola Jul 31, 2023
8017678
Added new component detector for simple pypi
Omotola Jul 31, 2023
5e8e1a7
added the new pip detector to service collection and updated logger
Omotola Jul 31, 2023
cbfccac
updated logger in new pip detector test
Omotola Jul 31, 2023
6750fdd
Added new pip detector to experiments
Omotola Jul 31, 2023
398b4fc
Added simplepypiclient to service collection
Omotola Jul 31, 2023
6d3b006
Merge branch 'users/oakeredolu/newpipclient' of https://github.com/mi…
Omotola Jul 31, 2023
4a4db0a
Added the new pythonresolver to service collection
Omotola Jul 31, 2023
b74bd3a
Merge branch 'users/oakeredolu/newpipservice' of https://github.com/m…
Omotola Jul 31, 2023
2c5da6a
Merge with main branch
Omotola Aug 1, 2023
314a24a
updated version regex for pre/post releases, updated tests to check m…
Omotola Aug 2, 2023
8fa579e
Updated detector id and version
Omotola Aug 2, 2023
9fcc12d
typo fix
Omotola Aug 2, 2023
9dd3a5e
Merge with main
Omotola Aug 2, 2023
dc4b322
refactoring and error logging
Omotola Aug 2, 2023
86dd700
Merge branch 'main' of https://github.com/microsoft/component-detecti…
Omotola Aug 3, 2023
fb5f14d
Made the simplepip detector defaultOff
Omotola Aug 4, 2023
a7d1dd6
removed iexperimentaldetector
Omotola Aug 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;

using System;
using System.Collections.Generic;
using System.Linq;
using System.Reactive.Linq;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.Internal;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;

public class SimplePipComponentDetector : FileComponentDetector, IDefaultOffComponentDetector
{
private readonly IPythonCommandService pythonCommandService;
private readonly ISimplePythonResolver pythonResolver;

public SimplePipComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
IPythonCommandService pythonCommandService,
ISimplePythonResolver pythonResolver,
ILogger<SimplePipComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.pythonCommandService = pythonCommandService;
this.pythonResolver = pythonResolver;
this.Logger = logger;
}

public override string Id => "SimplePip";

public override IList<string> SearchPatterns => new List<string> { "setup.py", "requirements.txt" };

public override IEnumerable<string> Categories => new List<string> { "Python" };

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 1;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
{
this.Logger.LogInformation($"No python found on system. Python detection will not run.");

return Enumerable.Empty<ProcessRequest>().ToObservable();
}

return processRequests;
}

protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var file = processRequest.ComponentStream;

try
{
var initialPackages = await this.pythonCommandService.ParseFileAsync(file.Location, pythonExePath);
var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
.Select(tuple => tuple.PackageString)
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => new PipDependencySpecification(x))
.Where(x => !x.PackageIsUnsafe())
.ToList();

var roots = await this.pythonResolver.ResolveRootsAsync(singleFileComponentRecorder, listedPackage);

RecordComponents(
singleFileComponentRecorder,
roots);

initialPackages.Where(tuple => tuple.Component != null)
.Select(tuple => new DetectedComponent(tuple.Component))
.ToList()
.ForEach(gitComponent => singleFileComponentRecorder.RegisterUsage(gitComponent, isExplicitReferencedDependency: true));
}
catch (Exception e)
{
this.Logger.LogError(e, "Error while parsing pip components in {File}", file.Location);
}
}

private static void RecordComponents(
ISingleFileComponentRecorder recorder,
IList<PipGraphNode> roots)
{
var nonRoots = new Queue<(DetectedComponent, PipGraphNode)>();

var explicitRoots = roots.Select(a => a.Value).ToHashSet();

foreach (var root in roots)
{
var rootDetectedComponent = new DetectedComponent(root.Value);

recorder.RegisterUsage(
rootDetectedComponent,
isExplicitReferencedDependency: true);

foreach (var child in root.Children)
{
nonRoots.Enqueue((rootDetectedComponent, child));
}
}

var registeredIds = new HashSet<string>();

while (nonRoots.Count > 0)
{
var (parent, item) = nonRoots.Dequeue();

var detectedComponent = new DetectedComponent(item.Value);

recorder.RegisterUsage(
detectedComponent,
parentComponentId: parent.Component.Id);

if (!registeredIds.Contains(detectedComponent.Component.Id))
{
foreach (var child in item.Children)
{
nonRoots.Enqueue((detectedComponent, child));
}

registeredIds.Add(detectedComponent.Component.Id);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;

public class SimplePythonResolver : ISimplePythonResolver
{
private static readonly Regex VersionRegex = new(@"-(\d+\.\d+(\.\d)*)(.tar|-)", RegexOptions.Compiled);
private static readonly Regex VersionRegex = new(@"-(\d+(\.)\w+((\+|\.)\w*)*)(.tar|-)", RegexOptions.Compiled);

private readonly ISimplePyPiClient simplePypiClient;
private readonly ILogger<SimplePythonResolver> logger;
Expand Down Expand Up @@ -100,10 +100,18 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec

var simplePythonProject = await this.simplePypiClient.GetSimplePypiProjectAsync(rootPackage);

if (simplePythonProject != null && simplePythonProject.Files.Any())
if (simplePythonProject == null || !simplePythonProject.Files.Any())
{
var pythonProject = this.ConvertSimplePypiProjectToSortedDictionary(simplePythonProject, rootPackage);
this.logger.LogWarning(
"Root dependency {RootPackageName} not found on pypi. Skipping package.",
rootPackage.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name);
}

var pythonProject = this.ConvertSimplePypiProjectToSortedDictionary(simplePythonProject, rootPackage);

if (pythonProject.Keys.Any())
{
state.ValidVersionMap[rootPackage.Name] = pythonProject;

// Grab the latest version as our candidate version
Expand All @@ -121,7 +129,7 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
else
{
this.logger.LogWarning(
"Root dependency {RootPackageName} not found on pypi. Skipping package.",
"Unable to resolve package: {RootPackageName} gotten from pypi possibly due to invalid versions. Skipping package.",
rootPackage.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name);
}
Expand Down Expand Up @@ -169,9 +177,17 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
// We haven't encountered this package before, so let's fetch it and find a candidate
var newProject = await this.simplePypiClient.GetSimplePypiProjectAsync(dependencyNode);

if (newProject != null && newProject.Files.Any())
if (newProject == null || !newProject.Files.Any())
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
dependencyNode.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}

var result = this.ConvertSimplePypiProjectToSortedDictionary(newProject, dependencyNode);
if (result.Keys.Any())
{
var result = this.ConvertSimplePypiProjectToSortedDictionary(newProject, dependencyNode);
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;
Expand All @@ -183,7 +199,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
else
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
"Unable to resolve dependency package {DependencyName} gotten from pypi possibly due to invalid versions. Skipping package",
dependencyNode.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
Expand All @@ -202,7 +218,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
/// <returns> Returns a SortedDictionary of PythonProjectReleases. </returns>
private SortedDictionary<string, IList<PythonProjectRelease>> ConvertSimplePypiProjectToSortedDictionary(SimplePypiProject simplePypiProject, PipDependencySpecification spec)
{
var sortedProjectVersions = new SortedDictionary<string, IList<PythonProjectRelease>>();
var sortedProjectVersions = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer());
foreach (var file in simplePypiProject.Files)
{
try
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace Microsoft.ComponentDetection.Orchestrator.Experiments.Configs;

using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Detectors.Pip;

/// <summary>
/// Validating the <see cref="SimplePipComponentDetector"/>.
/// </summary>
public class SimplePipExperiment : IExperimentConfiguration
{
/// <inheritdoc />
public string Name => "NewPipDetector";

/// <inheritdoc />
public bool IsInControlGroup(IComponentDetector componentDetector) => componentDetector is PipComponentDetector;

/// <inheritdoc />
public bool IsInExperimentGroup(IComponentDetector componentDetector) => componentDetector is SimplePipComponentDetector;

/// <inheritdoc />
public bool ShouldRecord(IComponentDetector componentDetector, int numComponents) => true;
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
services.AddSingleton<IExperimentProcessor, DefaultExperimentProcessor>();
services.AddSingleton<IExperimentConfiguration, NewNugetExperiment>();
services.AddSingleton<IExperimentConfiguration, NpmLockfile3Experiment>();
services.AddSingleton<IExperimentConfiguration, SimplePipExperiment>();

// Detectors
// CocoaPods
Expand Down Expand Up @@ -118,6 +119,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
services.AddSingleton<IPythonResolver, PythonResolver>();
services.AddSingleton<ISimplePythonResolver, SimplePythonResolver>();
services.AddSingleton<IComponentDetector, PipComponentDetector>();
services.AddSingleton<IComponentDetector, SimplePipComponentDetector>();

// pnpm
services.AddSingleton<IComponentDetector, PnpmComponentDetector>();
Expand Down
Loading
Loading