From edf0c8dc6eadf2059b6f9f8e50a460c5749679ba Mon Sep 17 00:00:00 2001 From: Paul Dorsch <107068277+pauld-msft@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:28:52 -0400 Subject: [PATCH] Fix bug where pipreport used index-urls from requirements.txt (#1227) * fix bug where pipreport used index urls in requirements.txt * update tests * docs * add --no-input to pip install, so we do not hang waiting for user input * pr feedback: performance and cleanup * bump version --- docs/detectors/pip.md | 4 + .../FileUtilityService.cs | 32 ++++ .../FileComponentDetector.cs | 5 +- .../IFileUtilityService.cs | 8 + .../pip/PipCommandService.cs | 139 ++++++++++-------- .../pip/PipReportComponentDetector.cs | 2 +- .../FileUtilityServiceTests.cs | 55 +++++++ ...oft.ComponentDetection.Common.Tests.csproj | 8 +- .../Resources/test-file-duplicate.txt | 3 + .../PipCommandServiceTests.cs | 102 +++++++++++++ .../pip/index-removal/requirements.txt | 3 + 11 files changed, 300 insertions(+), 61 deletions(-) create mode 100644 test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs create mode 100644 test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt create mode 100644 test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt diff --git a/docs/detectors/pip.md b/docs/detectors/pip.md index 0e8df8028..b11a15ab7 100644 --- a/docs/detectors/pip.md +++ b/docs/detectors/pip.md @@ -67,3 +67,7 @@ The environment variable `PipReportSkipFallbackOnFailure` is used to skip the de The environment variable `PipReportFileLevelTimeoutSeconds` is used to control the timeout limit for generating the PipReport for individual files. This defaults to the overall timeout. The environment variable `PipReportDisableFastDeps` is used to disable the fast deps feature in PipReport. + +The enviroment variable `PipReportIgnoreFileLevelIndexUrl` is used to ignore the `--index-url` argument that can be provided in the requirements.txt file: https://pip.pypa.io/en/stable/cli/pip_install/#install-index-url + +The enviroment variable `PipReportPersistReports` allows the PipReport detector to persist the reports that it generates, rather than cleaning them up after constructing the dependency graph. diff --git a/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs b/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs index e9a89699c..5faf0d06e 100644 --- a/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs +++ b/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs @@ -1,5 +1,6 @@ namespace Microsoft.ComponentDetection.Common; +using System.Collections.Generic; using System.IO; using System.Threading.Tasks; using Microsoft.ComponentDetection.Contracts; @@ -36,4 +37,35 @@ public Stream MakeFileStream(string fileName) { return new FileStream(fileName, FileMode.Open, FileAccess.Read); } + + /// + public (string DuplicateFilePath, bool CreatedDuplicate) DuplicateFileWithoutLines(string fileName, string removalIndicator) + { + // Read all lines from the file and filter out the lines that start with the removal indicator. + var removedAnyLines = false; + var linesToKeep = new List(); + foreach (var line in File.ReadLines(fileName)) + { + if (line == null || line.Trim().StartsWith(removalIndicator, System.StringComparison.OrdinalIgnoreCase)) + { + removedAnyLines = true; + } + else + { + linesToKeep.Add(line); + } + } + + // If the file did not have any lines to remove, return null. + if (!removedAnyLines) + { + return (null, false); + } + + // Otherwise, write the lines to a new file and return the new file path. + var duplicateFileName = $"temp.{Path.GetFileName(fileName)}"; + var duplicateFilePath = Path.Combine(Path.GetDirectoryName(fileName), duplicateFileName); + File.WriteAllLines(duplicateFilePath, linesToKeep); + return (duplicateFilePath, true); + } } diff --git a/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs b/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs index 819619744..b69574936 100644 --- a/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs @@ -110,12 +110,15 @@ protected Task> GetFileStreamsAsync(DirectoryInfo private async Task ProcessAsync(IObservable processRequests, IDictionary detectorArgs, int maxThreads, CancellationToken cancellationToken = default) { + var threadsToUse = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1; + this.Telemetry["ThreadsUsed"] = $"{threadsToUse}"; + var processor = new ActionBlock( async processRequest => await this.OnFileFoundAsync(processRequest, detectorArgs, cancellationToken), new ExecutionDataflowBlockOptions { // MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in - MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1, + MaxDegreeOfParallelism = threadsToUse, }); var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs, cancellationToken); diff --git a/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs b/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs index a76c1ca61..88c1675c4 100644 --- a/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs +++ b/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs @@ -42,4 +42,12 @@ public interface IFileUtilityService /// Path to the file. /// Returns a stream representing the file. Stream MakeFileStream(string fileName); + + /// + /// Duplicates a file, removing any lines that starts with the given string. + /// + /// Path to the file. + /// The string that indicates a line should be removed. + /// Returns the path of the new file, and whether or not one was created. + (string DuplicateFilePath, bool CreatedDuplicate) DuplicateFileWithoutLines(string fileName, string removalIndicator); } diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs index d6b495d46..8e8bd518c 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs @@ -15,6 +15,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip; public class PipCommandService : IPipCommandService { private const string PipReportDisableFastDepsEnvVar = "PipReportDisableFastDeps"; + private const string PipReportIgnoreFileLevelIndexUrlEnvVar = "PipReportIgnoreFileLevelIndexUrl"; private readonly ICommandLineInvocationService commandLineInvocationService; private readonly IPathUtilityService pathUtilityService; @@ -134,71 +135,93 @@ private async Task ExecuteCommandAsync( var reportName = Path.GetFileNameWithoutExtension(Path.GetRandomFileName()) + ".component-detection-pip-report.json"; var reportFile = new FileInfo(Path.Combine(workingDir.FullName, reportName)); + FileInfo duplicateFile = null; string pipReportCommand; - if (path.EndsWith(".py")) - { - pipReportCommand = $"install -e ."; - } - else if (path.EndsWith(".txt")) - { - pipReportCommand = "install -r requirements.txt"; - } - else - { - // Failure case, but this shouldn't be hit since detection is only running - // on setup.py and requirements.txt files. - this.logger.LogDebug("PipReport: Unsupported file type for pip installation report: {Path}", path); - return (new PipInstallationReport(), null); - } - - // When PIP_INDEX_URL is set, we need to pass it as a parameter to pip install command. - // This should be done before running detection by the build system, otherwise the detection - // will default to the public PyPI index if not configured in pip defaults. Note this index URL may have credentials, we need to remove it when logging. - pipReportCommand += $" --dry-run --ignore-installed --quiet --report {reportName}"; - if (this.environmentService.DoesEnvironmentVariableExist("PIP_INDEX_URL")) - { - pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}"; - } - - if (!this.environmentService.DoesEnvironmentVariableExist(PipReportDisableFastDepsEnvVar) || !this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar)) - { - pipReportCommand += $" --use-feature=fast-deps"; - } - - this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation()); - command = await this.ExecuteCommandAsync( - pipExecutable, - pythonExecutable, - null, - workingDir, - cancellationToken, - pipReportCommand); - - if (command.ExitCode == -1 && cancellationToken.IsCancellationRequested) + try { - var errorMessage = $"PipReport: Cancelled for file '{formattedPath}' with command '{pipReportCommand.RemoveSensitiveInformation()}'."; - using var failureRecord = new PipReportFailureTelemetryRecord + if (path.EndsWith(".py")) + { + pipReportCommand = "install -e ."; + } + else if (path.EndsWith(".txt", StringComparison.OrdinalIgnoreCase)) { - ExitCode = command.ExitCode, - StdErr = $"{errorMessage} {command.StdErr}", - }; + pipReportCommand = "install -r requirements.txt"; + if (this.environmentService.IsEnvironmentVariableValueTrue(PipReportIgnoreFileLevelIndexUrlEnvVar)) + { + // check for --index-url in requirements.txt and remove it from the file, since we want to use PIP_INDEX_URL from the environment. + var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(formattedPath, "--index-url"); + if (createdDuplicate) + { + var duplicateFileName = Path.GetFileName(duplicateFilePath); + duplicateFile = new FileInfo(duplicateFilePath); + pipReportCommand = $"install -r {duplicateFileName}"; + } + } + } + else + { + // Failure case, but this shouldn't be hit since detection is only running + // on setup.py and requirements.txt files. + this.logger.LogDebug("PipReport: Unsupported file type for pip installation report: {Path}", path); + return (new PipInstallationReport(), null); + } + + // When PIP_INDEX_URL is set, we need to pass it as a parameter to pip install command. + // This should be done before running detection by the build system, otherwise the detection + // will default to the public PyPI index if not configured in pip defaults. Note this index URL may have credentials, we need to remove it when logging. + pipReportCommand += $" --dry-run --ignore-installed --quiet --no-input --report {reportName}"; + if (this.environmentService.DoesEnvironmentVariableExist("PIP_INDEX_URL")) + { + pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}"; + } - this.logger.LogWarning("{Error}", errorMessage); - throw new InvalidOperationException(errorMessage); + if (!this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar)) + { + pipReportCommand += " --use-feature=fast-deps"; + } + + this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation()); + command = await this.ExecuteCommandAsync( + pipExecutable, + pythonExecutable, + null, + workingDir, + cancellationToken, + pipReportCommand); + + if (command.ExitCode == -1 && cancellationToken.IsCancellationRequested) + { + var errorMessage = $"PipReport: Cancelled for file '{formattedPath}' with command '{pipReportCommand.RemoveSensitiveInformation()}'."; + using var failureRecord = new PipReportFailureTelemetryRecord + { + ExitCode = command.ExitCode, + StdErr = $"{errorMessage} {command.StdErr}", + }; + + this.logger.LogWarning("{Error}", errorMessage); + throw new InvalidOperationException(errorMessage); + } + else if (command.ExitCode != 0) + { + using var failureRecord = new PipReportFailureTelemetryRecord + { + ExitCode = command.ExitCode, + StdErr = command.StdErr, + }; + + this.logger.LogDebug("PipReport: Pip installation report error: {StdErr}", command.StdErr); + throw new InvalidOperationException($"PipReport: Failed to generate pip installation report for file {path} with exit code {command.ExitCode}"); + } + + var reportOutput = await this.fileUtilityService.ReadAllTextAsync(reportFile); + return (JsonConvert.DeserializeObject(reportOutput), reportFile); } - else if (command.ExitCode != 0) + finally { - using var failureRecord = new PipReportFailureTelemetryRecord + if (duplicateFile != null && duplicateFile.Exists) { - ExitCode = command.ExitCode, - StdErr = command.StdErr, - }; - - this.logger.LogDebug("PipReport: Pip installation report error: {StdErr}", command.StdErr); - throw new InvalidOperationException($"PipReport: Failed to generate pip installation report for file {path} with exit code {command.ExitCode}"); + duplicateFile.Delete(); + } } - - var reportOutput = await this.fileUtilityService.ReadAllTextAsync(reportFile); - return (JsonConvert.DeserializeObject(reportOutput), reportFile); } } diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs index 59b68e0f1..967c5978f 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs @@ -77,7 +77,7 @@ private enum PipReportOverrideBehavior public override IEnumerable SupportedComponentTypes { get; } = new[] { ComponentType.Pip }; - public override int Version { get; } = 6; + public override int Version { get; } = 7; protected override bool EnableParallelism { get; set; } = true; diff --git a/test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs new file mode 100644 index 000000000..ca44ae84a --- /dev/null +++ b/test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs @@ -0,0 +1,55 @@ +namespace Microsoft.ComponentDetection.Common.Tests; + +using System.IO; +using FluentAssertions; +using Microsoft.ComponentDetection.Contracts; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +[TestClass] +[TestCategory("Governance/All")] +[TestCategory("Governance/ComponentDetection")] +public class FileUtilityServiceTests +{ + private readonly IFileUtilityService fileUtilityService; + + public FileUtilityServiceTests() => + this.fileUtilityService = new FileUtilityService(); + + [TestMethod] + public void DuplicateFileWithoutLines_WithLinesToRemove_ShouldCreateDuplicateFileWithoutLines() + { + // Get directory of current executing assembly + var directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); + + // Arrange + var fileName = $"{directory}/Resources/test-file-duplicate.txt"; + var removalIndicator = "//REMOVE"; + var expectedDuplicateFilePath = Path.Combine(directory, "Resources", "temp.test-file-duplicate.txt"); + + // Act + var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(fileName, removalIndicator); + + // Assert + createdDuplicate.Should().BeTrue(); + duplicateFilePath.Should().Be(expectedDuplicateFilePath); + File.Exists(expectedDuplicateFilePath).Should().BeTrue(); + } + + [TestMethod] + public void DuplicateFileWithoutLines_WithoutLinesToRemove_ShouldNotCreateDuplicateFile() + { + // Get directory of current executing assembly + var directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); + + // Arrange + var fileName = $"{directory}/Resources/test-file-duplicate.txt"; + var removalIndicator = "//NOTEXIST"; + + // Act + var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(fileName, removalIndicator); + + // Assert + createdDuplicate.Should().BeFalse(); + duplicateFilePath.Should().BeNull(); + } +} diff --git a/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj b/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj index 3aed7eac5..8e9b5955a 100644 --- a/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj +++ b/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj @@ -1,4 +1,4 @@ - + @@ -13,4 +13,10 @@ + + + Always + + + diff --git a/test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt b/test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt new file mode 100644 index 000000000..9e8b2a86b --- /dev/null +++ b/test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt @@ -0,0 +1,3 @@ +hello +//REMOVE +world diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs index 3cebac312..2ecf76113 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs @@ -492,6 +492,108 @@ public async Task PipCommandService_GeneratesReport_MultiRequirementsTxt_Correct this.commandLineInvokationService.Verify(); } + [TestMethod] + public async Task PipCommandService_GeneratesReport_WithDuplicate_Async() + { + var testPath = Path.Join(Directory.GetCurrentDirectory(), string.Join(Guid.NewGuid().ToString(), "requirements.txt")); + + this.envVarService.Setup(x => x.IsEnvironmentVariableValueTrue("PipReportIgnoreFileLevelIndexUrl")).Returns(true); + this.commandLineInvokationService.Setup(x => x.CanCommandBeLocatedAsync("pip", It.IsAny>(), "--version")).ReturnsAsync(true); + + this.fileUtilityService.Setup(x => x.DuplicateFileWithoutLines(It.IsAny(), "--index-url")) + .Returns(("C:/asdf/temp.requirements.txt", true)) + .Verifiable(); + this.fileUtilityService.Setup(x => x.ReadAllTextAsync(It.IsAny())) + .ReturnsAsync(TestResources.pip_report_multi_pkg); + + this.commandLineInvokationService.Setup(x => x.ExecuteCommandAsync( + "pip", + It.IsAny>(), + It.Is(d => d.FullName.Contains(Directory.GetCurrentDirectory(), StringComparison.OrdinalIgnoreCase)), + It.IsAny(), + It.Is(s => s.Contains("temp.requirements.txt", StringComparison.OrdinalIgnoreCase)))) + .ReturnsAsync(new CommandLineExecutionResult { ExitCode = 0, StdErr = string.Empty, StdOut = string.Empty }) + .Verifiable(); + + var service = new PipCommandService( + this.commandLineInvokationService.Object, + this.pathUtilityService, + this.fileUtilityService.Object, + this.envVarService.Object, + this.logger.Object); + + var (report, reportFile) = await service.GenerateInstallationReportAsync(testPath); + this.fileUtilityService.Verify(); + this.commandLineInvokationService.Verify(); + } + + [TestMethod] + public async Task PipCommandService_GeneratesReport_WithoutDuplicate_Async() + { + var testPath = Path.Join(Directory.GetCurrentDirectory(), string.Join(Guid.NewGuid().ToString(), "requirements.txt")); + + this.envVarService.Setup(x => x.IsEnvironmentVariableValueTrue("PipReportIgnoreFileLevelIndexUrl")).Returns(true); + this.commandLineInvokationService.Setup(x => x.CanCommandBeLocatedAsync("pip", It.IsAny>(), "--version")).ReturnsAsync(true); + + this.fileUtilityService.Setup(x => x.DuplicateFileWithoutLines(It.IsAny(), "--index-url")) + .Returns((null, false)) + .Verifiable(); + this.fileUtilityService.Setup(x => x.ReadAllTextAsync(It.IsAny())) + .ReturnsAsync(TestResources.pip_report_multi_pkg); + + this.commandLineInvokationService.Setup(x => x.ExecuteCommandAsync( + "pip", + It.IsAny>(), + It.Is(d => d.FullName.Contains(Directory.GetCurrentDirectory(), StringComparison.OrdinalIgnoreCase)), + It.IsAny(), + It.Is(s => !s.Contains("temp.requirements.txt", StringComparison.OrdinalIgnoreCase)))) + .ReturnsAsync(new CommandLineExecutionResult { ExitCode = 0, StdErr = string.Empty, StdOut = string.Empty }) + .Verifiable(); + + var service = new PipCommandService( + this.commandLineInvokationService.Object, + this.pathUtilityService, + this.fileUtilityService.Object, + this.envVarService.Object, + this.logger.Object); + + var (report, reportFile) = await service.GenerateInstallationReportAsync(testPath); + this.fileUtilityService.Verify(); + this.commandLineInvokationService.Verify(); + } + + [TestMethod] + public async Task PipCommandService_GeneratesReport_UseFileIndex_Async() + { + var testPath = Path.Join(Directory.GetCurrentDirectory(), string.Join(Guid.NewGuid().ToString(), "requirements.txt")); + + this.envVarService.Setup(x => x.IsEnvironmentVariableValueTrue("PipReportIgnoreFileLevelIndexUrl")).Returns(false); + this.commandLineInvokationService.Setup(x => x.CanCommandBeLocatedAsync("pip", It.IsAny>(), "--version")).ReturnsAsync(true); + + this.fileUtilityService.Setup(x => x.ReadAllTextAsync(It.IsAny())) + .ReturnsAsync(TestResources.pip_report_multi_pkg); + + this.commandLineInvokationService.Setup(x => x.ExecuteCommandAsync( + "pip", + It.IsAny>(), + It.Is(d => d.FullName.Contains(Directory.GetCurrentDirectory(), StringComparison.OrdinalIgnoreCase)), + It.IsAny(), + It.Is(s => !s.Contains("temp.requirements.txt", StringComparison.OrdinalIgnoreCase)))) + .ReturnsAsync(new CommandLineExecutionResult { ExitCode = 0, StdErr = string.Empty, StdOut = string.Empty }) + .Verifiable(); + + var service = new PipCommandService( + this.commandLineInvokationService.Object, + this.pathUtilityService, + this.fileUtilityService.Object, + this.envVarService.Object, + this.logger.Object); + + var (report, reportFile) = await service.GenerateInstallationReportAsync(testPath); + this.fileUtilityService.Verify(x => x.DuplicateFileWithoutLines(It.IsAny(), "--index-url"), Times.Never); + this.commandLineInvokationService.Verify(); + } + [TestMethod] public async Task PipCommandService_GeneratesReport_BadFile_FailsAsync() { diff --git a/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt b/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt new file mode 100644 index 000000000..391be3370 --- /dev/null +++ b/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt @@ -0,0 +1,3 @@ +--index-url https://pkgs.dev.azure.com/msazure/_packaging/Avere-internal-python/pypi/simple + +adal==1.2.7 ; python_full_version >= "3.8.4" and python_full_version < "4.0.0"