diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs index 062218080..6a74abb90 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePythonResolver.cs @@ -14,15 +14,76 @@ namespace Microsoft.ComponentDetection.Detectors.Pip; public class SimplePythonResolver : ISimplePythonResolver { + private static readonly Regex VersionRegex = new(@"-(\d+\.\d+(\.\d)*)(.tar|-)", RegexOptions.Compiled); + private readonly ISimplePyPiClient simplePypiClient; - private readonly ILogger logger; + private readonly ILogger logger; - public SimplePythonResolver(ISimplePyPiClient simplePypiClient, ILogger logger) + /// + /// Initializes a new instance of the class. + /// + /// The simple PyPi client. + /// The logger. + public SimplePythonResolver(ISimplePyPiClient simplePypiClient, ILogger logger) { this.simplePypiClient = simplePypiClient; this.logger = logger; } + /// + /// Uses regex to extract the version from the file name. + /// + /// the name of the file from simple pypi. + /// returns a string representing the release version. + private static string GetVersionFromFileName(string fileName) + { + var version = VersionRegex.Match(fileName).Groups[1]; + return version.Value; + } + + /// + /// Returns the package type based on the file name. + /// + /// the name of the file from simple pypi. + /// a string representing the package type. + private static string GetPackageType(string fileName) + { + if (fileName.EndsWith(".whl")) + { + return "bdist_wheel"; + } + + if (fileName.EndsWith(".tar.gz")) + { + return "sdist"; + } + + return fileName.EndsWith(".egg") ? "bdist_egg" : string.Empty; + } + + /// + /// Adds a node to the graph. + /// + /// The PythonResolverState. + /// The parent node. + /// The package name. + /// The package version. + private static void AddGraphNode(PythonResolverState state, PipGraphNode parent, string name, string version) + { + if (state.NodeReferences.TryGetValue(name, out var value)) + { + parent.Children.Add(value); + value.Parents.Add(parent); + } + else + { + var node = new PipGraphNode(new PipComponent(name, version)); + state.NodeReferences[name] = node; + parent.Children.Add(node); + node.Parents.Add(parent); + } + } + /// public async Task> ResolveRootsAsync(ISingleFileComponentRecorder singleFileComponentRecorder, IList initialPackages) { @@ -32,35 +93,37 @@ public async Task> ResolveRootsAsync(ISingleFileComponentRec foreach (var rootPackage in initialPackages) { // If we have it, we probably just want to skip at this phase as this indicates duplicates - if (!state.ValidVersionMap.TryGetValue(rootPackage.Name, out _)) + if (state.ValidVersionMap.TryGetValue(rootPackage.Name, out _)) { - var simplePythonProject = await this.simplePypiClient.GetSimplePypiProjectAsync(rootPackage); + continue; + } - if (simplePythonProject != null && simplePythonProject.Files.Any()) - { - var pythonProject = this.ConvertSimplePypiProjectToSortedDictionary(simplePythonProject, rootPackage); + var simplePythonProject = await this.simplePypiClient.GetSimplePypiProjectAsync(rootPackage); - state.ValidVersionMap[rootPackage.Name] = pythonProject; + if (simplePythonProject != null && simplePythonProject.Files.Any()) + { + var pythonProject = this.ConvertSimplePypiProjectToSortedDictionary(simplePythonProject, rootPackage); - // Grab the latest version as our candidate version - var candidateVersion = state.ValidVersionMap[rootPackage.Name].Keys.Any() - ? state.ValidVersionMap[rootPackage.Name].Keys.Last() : null; + state.ValidVersionMap[rootPackage.Name] = pythonProject; - var node = new PipGraphNode(new PipComponent(rootPackage.Name, candidateVersion)); + // Grab the latest version as our candidate version + var candidateVersion = state.ValidVersionMap[rootPackage.Name].Keys.Any() + ? state.ValidVersionMap[rootPackage.Name].Keys.Last() : null; - state.NodeReferences[rootPackage.Name] = node; + var node = new PipGraphNode(new PipComponent(rootPackage.Name, candidateVersion)); - state.Roots.Add(node); + state.NodeReferences[rootPackage.Name] = node; - state.ProcessingQueue.Enqueue((rootPackage.Name, rootPackage)); - } - else - { - this.logger.LogWarning( - "Root dependency {RootPackageName} not found on pypi. Skipping package.", - rootPackage.Name); - singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name); - } + state.Roots.Add(node); + + state.ProcessingQueue.Enqueue((rootPackage.Name, rootPackage)); + } + else + { + this.logger.LogWarning( + "Root dependency {RootPackageName} not found on pypi. Skipping package.", + rootPackage.Name); + singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name); } } @@ -113,7 +176,7 @@ private async Task> ProcessQueueAsync(ISingleFileComponentRe var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any() ? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null; - this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion); + AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion); state.ProcessingQueue.Enqueue((root, dependencyNode)); } @@ -144,20 +207,22 @@ private SortedDictionary> ConvertSimplePypiP { try { - var packageType = this.GetPackageType(file.FileName); - var version = this.GetVersionFromFileName(file.FileName); + var packageType = GetPackageType(file.FileName); + var version = GetVersionFromFileName(file.FileName); var parsedVersion = PythonVersion.Create(version); - if (parsedVersion.Valid && parsedVersion.IsReleasedPackage && - PythonVersionUtilities.VersionValidForSpec(version, spec.DependencySpecifiers)) + if (!parsedVersion.Valid || !parsedVersion.IsReleasedPackage || + !PythonVersionUtilities.VersionValidForSpec(version, spec.DependencySpecifiers)) { - var pythonProjectRelease = new PythonProjectRelease() { PythonVersion = version, PackageType = packageType, Size = file.Size, Url = file.Url }; - if (!sortedProjectVersions.ContainsKey(version)) - { - sortedProjectVersions.Add(version, new List()); - } + continue; + } - sortedProjectVersions[version].Add(pythonProjectRelease); + var pythonProjectRelease = new PythonProjectRelease { PythonVersion = version, PackageType = packageType, Size = file.Size, Url = file.Url }; + if (!sortedProjectVersions.ContainsKey(version)) + { + sortedProjectVersions.Add(version, new List()); } + + sortedProjectVersions[version].Add(pythonProjectRelease); } catch (ArgumentException ae) { @@ -166,49 +231,12 @@ private SortedDictionary> ConvertSimplePypiP "Release {Release} could not be added to the sorted list of pip components for spec={SpecName}. Usually this happens with unexpected PyPi version formats (e.g. prerelease/dev versions).", JsonConvert.SerializeObject(file), spec.Name); - continue; } } return sortedProjectVersions; } - /// - /// Returns the package type based on the file name. - /// - /// the name of the file from simple pypi. - /// a string representing the package type. - private string GetPackageType(string fileName) - { - if (fileName.EndsWith(".whl")) - { - return "bdist_wheel"; - } - else if (fileName.EndsWith(".tar.gz")) - { - return "sdist"; - } - else if (fileName.EndsWith(".egg")) - { - return "bdist_egg"; - } - else - { - return string.Empty; - } - } - - /// - /// Uses regex to extract the version from the file name. - /// - /// the name of the file from simple pypi. - /// returns a string representing the release version. - private string GetVersionFromFileName(string fileName) - { - var version = Regex.Match(fileName, @"-(\d+\.\d+(\.\d)*)(.tar|-)").Groups[1]; - return version.Value; - } - /// /// Fetches the dependencies for a package. /// @@ -248,7 +276,7 @@ private async Task> FetchPackageDependenciesAs private async Task> FetchDependenciesFromPackageStreamAsync(string name, string version, Stream packageStream) { var dependencies = new List(); - var package = new ZipArchive(packageStream); + using var package = new ZipArchive(packageStream); var entry = package.GetEntry($"{name.Replace('-', '_')}-{version}.dist-info/METADATA"); @@ -259,7 +287,7 @@ private async Task> FetchDependenciesFromPacka } var content = new List(); - using (var stream = entry.Open()) + await using (var stream = entry.Open()) { using var streamReader = new StreamReader(stream); @@ -277,10 +305,7 @@ private async Task> FetchDependenciesFromPacka // Pull the packages that aren't conditional based on "extras" // Right now we just want to resolve the graph as most comsumers will // experience it - foreach (var deps in content.Where(x => !x.Contains("extra =="))) - { - dependencies.Add(new PipDependencySpecification(deps, true)); - } + dependencies.AddRange(content.Where(x => !x.Contains("extra ==")).Select(deps => new PipDependencySpecification(deps, true))); return dependencies; } @@ -302,12 +327,9 @@ private async Task InvalidateAndReprocessAsync( var oldVersions = state.ValidVersionMap[pipComponent.Name].Keys.ToList(); var currentSelectedVersion = node.Value.Version; var currentReleases = state.ValidVersionMap[pipComponent.Name][currentSelectedVersion]; - foreach (var version in oldVersions) + foreach (var version in oldVersions.Where(version => !PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers))) { - if (!PythonVersionUtilities.VersionValidForSpec(version, newSpec.DependencySpecifiers)) - { - state.ValidVersionMap[pipComponent.Name].Remove(version); - } + state.ValidVersionMap[pipComponent.Name].Remove(version); } if (state.ValidVersionMap[pipComponent.Name].Count == 0) @@ -347,27 +369,4 @@ private async Task InvalidateAndReprocessAsync( return true; } - - /// - /// Adds a node to the graph. - /// - /// The PythonResolverState. - /// The parent node. - /// The package name. - /// The package version. - private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string name, string version) - { - if (state.NodeReferences.TryGetValue(name, out var value)) - { - parent.Children.Add(value); - value.Parents.Add(parent); - } - else - { - var node = new PipGraphNode(new PipComponent(name, version)); - state.NodeReferences[name] = node; - parent.Children.Add(node); - node.Parents.Add(parent); - } - } }