diff --git a/docs/detectors/pip.md b/docs/detectors/pip.md
index 0e8df8028..b11a15ab7 100644
--- a/docs/detectors/pip.md
+++ b/docs/detectors/pip.md
@@ -67,3 +67,7 @@ The environment variable `PipReportSkipFallbackOnFailure` is used to skip the de
The environment variable `PipReportFileLevelTimeoutSeconds` is used to control the timeout limit for generating the PipReport for individual files. This defaults to the overall timeout.
The environment variable `PipReportDisableFastDeps` is used to disable the fast deps feature in PipReport.
+
+The enviroment variable `PipReportIgnoreFileLevelIndexUrl` is used to ignore the `--index-url` argument that can be provided in the requirements.txt file: https://pip.pypa.io/en/stable/cli/pip_install/#install-index-url
+
+The enviroment variable `PipReportPersistReports` allows the PipReport detector to persist the reports that it generates, rather than cleaning them up after constructing the dependency graph.
diff --git a/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs b/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs
index e9a89699c..5faf0d06e 100644
--- a/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs
+++ b/src/Microsoft.ComponentDetection.Common/FileUtilityService.cs
@@ -1,5 +1,6 @@
namespace Microsoft.ComponentDetection.Common;
+using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
@@ -36,4 +37,35 @@ public Stream MakeFileStream(string fileName)
{
return new FileStream(fileName, FileMode.Open, FileAccess.Read);
}
+
+ ///
+ public (string DuplicateFilePath, bool CreatedDuplicate) DuplicateFileWithoutLines(string fileName, string removalIndicator)
+ {
+ // Read all lines from the file and filter out the lines that start with the removal indicator.
+ var removedAnyLines = false;
+ var linesToKeep = new List();
+ foreach (var line in File.ReadLines(fileName))
+ {
+ if (line == null || line.Trim().StartsWith(removalIndicator, System.StringComparison.OrdinalIgnoreCase))
+ {
+ removedAnyLines = true;
+ }
+ else
+ {
+ linesToKeep.Add(line);
+ }
+ }
+
+ // If the file did not have any lines to remove, return null.
+ if (!removedAnyLines)
+ {
+ return (null, false);
+ }
+
+ // Otherwise, write the lines to a new file and return the new file path.
+ var duplicateFileName = $"temp.{Path.GetFileName(fileName)}";
+ var duplicateFilePath = Path.Combine(Path.GetDirectoryName(fileName), duplicateFileName);
+ File.WriteAllLines(duplicateFilePath, linesToKeep);
+ return (duplicateFilePath, true);
+ }
}
diff --git a/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs b/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs
index 819619744..b69574936 100644
--- a/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs
+++ b/src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs
@@ -110,12 +110,15 @@ protected Task> GetFileStreamsAsync(DirectoryInfo
private async Task ProcessAsync(IObservable processRequests, IDictionary detectorArgs, int maxThreads, CancellationToken cancellationToken = default)
{
+ var threadsToUse = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1;
+ this.Telemetry["ThreadsUsed"] = $"{threadsToUse}";
+
var processor = new ActionBlock(
async processRequest => await this.OnFileFoundAsync(processRequest, detectorArgs, cancellationToken),
new ExecutionDataflowBlockOptions
{
// MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in
- MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
+ MaxDegreeOfParallelism = threadsToUse,
});
var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs, cancellationToken);
diff --git a/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs b/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs
index a76c1ca61..88c1675c4 100644
--- a/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs
+++ b/src/Microsoft.ComponentDetection.Contracts/IFileUtilityService.cs
@@ -42,4 +42,12 @@ public interface IFileUtilityService
/// Path to the file.
/// Returns a stream representing the file.
Stream MakeFileStream(string fileName);
+
+ ///
+ /// Duplicates a file, removing any lines that starts with the given string.
+ ///
+ /// Path to the file.
+ /// The string that indicates a line should be removed.
+ /// Returns the path of the new file, and whether or not one was created.
+ (string DuplicateFilePath, bool CreatedDuplicate) DuplicateFileWithoutLines(string fileName, string removalIndicator);
}
diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs
index d6b495d46..8e8bd518c 100644
--- a/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs
+++ b/src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs
@@ -15,6 +15,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
public class PipCommandService : IPipCommandService
{
private const string PipReportDisableFastDepsEnvVar = "PipReportDisableFastDeps";
+ private const string PipReportIgnoreFileLevelIndexUrlEnvVar = "PipReportIgnoreFileLevelIndexUrl";
private readonly ICommandLineInvocationService commandLineInvocationService;
private readonly IPathUtilityService pathUtilityService;
@@ -134,71 +135,93 @@ private async Task ExecuteCommandAsync(
var reportName = Path.GetFileNameWithoutExtension(Path.GetRandomFileName()) + ".component-detection-pip-report.json";
var reportFile = new FileInfo(Path.Combine(workingDir.FullName, reportName));
+ FileInfo duplicateFile = null;
string pipReportCommand;
- if (path.EndsWith(".py"))
- {
- pipReportCommand = $"install -e .";
- }
- else if (path.EndsWith(".txt"))
- {
- pipReportCommand = "install -r requirements.txt";
- }
- else
- {
- // Failure case, but this shouldn't be hit since detection is only running
- // on setup.py and requirements.txt files.
- this.logger.LogDebug("PipReport: Unsupported file type for pip installation report: {Path}", path);
- return (new PipInstallationReport(), null);
- }
-
- // When PIP_INDEX_URL is set, we need to pass it as a parameter to pip install command.
- // This should be done before running detection by the build system, otherwise the detection
- // will default to the public PyPI index if not configured in pip defaults. Note this index URL may have credentials, we need to remove it when logging.
- pipReportCommand += $" --dry-run --ignore-installed --quiet --report {reportName}";
- if (this.environmentService.DoesEnvironmentVariableExist("PIP_INDEX_URL"))
- {
- pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}";
- }
-
- if (!this.environmentService.DoesEnvironmentVariableExist(PipReportDisableFastDepsEnvVar) || !this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar))
- {
- pipReportCommand += $" --use-feature=fast-deps";
- }
-
- this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation());
- command = await this.ExecuteCommandAsync(
- pipExecutable,
- pythonExecutable,
- null,
- workingDir,
- cancellationToken,
- pipReportCommand);
-
- if (command.ExitCode == -1 && cancellationToken.IsCancellationRequested)
+ try
{
- var errorMessage = $"PipReport: Cancelled for file '{formattedPath}' with command '{pipReportCommand.RemoveSensitiveInformation()}'.";
- using var failureRecord = new PipReportFailureTelemetryRecord
+ if (path.EndsWith(".py"))
+ {
+ pipReportCommand = "install -e .";
+ }
+ else if (path.EndsWith(".txt", StringComparison.OrdinalIgnoreCase))
{
- ExitCode = command.ExitCode,
- StdErr = $"{errorMessage} {command.StdErr}",
- };
+ pipReportCommand = "install -r requirements.txt";
+ if (this.environmentService.IsEnvironmentVariableValueTrue(PipReportIgnoreFileLevelIndexUrlEnvVar))
+ {
+ // check for --index-url in requirements.txt and remove it from the file, since we want to use PIP_INDEX_URL from the environment.
+ var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(formattedPath, "--index-url");
+ if (createdDuplicate)
+ {
+ var duplicateFileName = Path.GetFileName(duplicateFilePath);
+ duplicateFile = new FileInfo(duplicateFilePath);
+ pipReportCommand = $"install -r {duplicateFileName}";
+ }
+ }
+ }
+ else
+ {
+ // Failure case, but this shouldn't be hit since detection is only running
+ // on setup.py and requirements.txt files.
+ this.logger.LogDebug("PipReport: Unsupported file type for pip installation report: {Path}", path);
+ return (new PipInstallationReport(), null);
+ }
+
+ // When PIP_INDEX_URL is set, we need to pass it as a parameter to pip install command.
+ // This should be done before running detection by the build system, otherwise the detection
+ // will default to the public PyPI index if not configured in pip defaults. Note this index URL may have credentials, we need to remove it when logging.
+ pipReportCommand += $" --dry-run --ignore-installed --quiet --no-input --report {reportName}";
+ if (this.environmentService.DoesEnvironmentVariableExist("PIP_INDEX_URL"))
+ {
+ pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}";
+ }
- this.logger.LogWarning("{Error}", errorMessage);
- throw new InvalidOperationException(errorMessage);
+ if (!this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar))
+ {
+ pipReportCommand += " --use-feature=fast-deps";
+ }
+
+ this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation());
+ command = await this.ExecuteCommandAsync(
+ pipExecutable,
+ pythonExecutable,
+ null,
+ workingDir,
+ cancellationToken,
+ pipReportCommand);
+
+ if (command.ExitCode == -1 && cancellationToken.IsCancellationRequested)
+ {
+ var errorMessage = $"PipReport: Cancelled for file '{formattedPath}' with command '{pipReportCommand.RemoveSensitiveInformation()}'.";
+ using var failureRecord = new PipReportFailureTelemetryRecord
+ {
+ ExitCode = command.ExitCode,
+ StdErr = $"{errorMessage} {command.StdErr}",
+ };
+
+ this.logger.LogWarning("{Error}", errorMessage);
+ throw new InvalidOperationException(errorMessage);
+ }
+ else if (command.ExitCode != 0)
+ {
+ using var failureRecord = new PipReportFailureTelemetryRecord
+ {
+ ExitCode = command.ExitCode,
+ StdErr = command.StdErr,
+ };
+
+ this.logger.LogDebug("PipReport: Pip installation report error: {StdErr}", command.StdErr);
+ throw new InvalidOperationException($"PipReport: Failed to generate pip installation report for file {path} with exit code {command.ExitCode}");
+ }
+
+ var reportOutput = await this.fileUtilityService.ReadAllTextAsync(reportFile);
+ return (JsonConvert.DeserializeObject(reportOutput), reportFile);
}
- else if (command.ExitCode != 0)
+ finally
{
- using var failureRecord = new PipReportFailureTelemetryRecord
+ if (duplicateFile != null && duplicateFile.Exists)
{
- ExitCode = command.ExitCode,
- StdErr = command.StdErr,
- };
-
- this.logger.LogDebug("PipReport: Pip installation report error: {StdErr}", command.StdErr);
- throw new InvalidOperationException($"PipReport: Failed to generate pip installation report for file {path} with exit code {command.ExitCode}");
+ duplicateFile.Delete();
+ }
}
-
- var reportOutput = await this.fileUtilityService.ReadAllTextAsync(reportFile);
- return (JsonConvert.DeserializeObject(reportOutput), reportFile);
}
}
diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs
index 59b68e0f1..967c5978f 100644
--- a/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs
+++ b/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs
@@ -77,7 +77,7 @@ private enum PipReportOverrideBehavior
public override IEnumerable SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
- public override int Version { get; } = 6;
+ public override int Version { get; } = 7;
protected override bool EnableParallelism { get; set; } = true;
diff --git a/test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs
new file mode 100644
index 000000000..ca44ae84a
--- /dev/null
+++ b/test/Microsoft.ComponentDetection.Common.Tests/FileUtilityServiceTests.cs
@@ -0,0 +1,55 @@
+namespace Microsoft.ComponentDetection.Common.Tests;
+
+using System.IO;
+using FluentAssertions;
+using Microsoft.ComponentDetection.Contracts;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+[TestClass]
+[TestCategory("Governance/All")]
+[TestCategory("Governance/ComponentDetection")]
+public class FileUtilityServiceTests
+{
+ private readonly IFileUtilityService fileUtilityService;
+
+ public FileUtilityServiceTests() =>
+ this.fileUtilityService = new FileUtilityService();
+
+ [TestMethod]
+ public void DuplicateFileWithoutLines_WithLinesToRemove_ShouldCreateDuplicateFileWithoutLines()
+ {
+ // Get directory of current executing assembly
+ var directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
+
+ // Arrange
+ var fileName = $"{directory}/Resources/test-file-duplicate.txt";
+ var removalIndicator = "//REMOVE";
+ var expectedDuplicateFilePath = Path.Combine(directory, "Resources", "temp.test-file-duplicate.txt");
+
+ // Act
+ var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(fileName, removalIndicator);
+
+ // Assert
+ createdDuplicate.Should().BeTrue();
+ duplicateFilePath.Should().Be(expectedDuplicateFilePath);
+ File.Exists(expectedDuplicateFilePath).Should().BeTrue();
+ }
+
+ [TestMethod]
+ public void DuplicateFileWithoutLines_WithoutLinesToRemove_ShouldNotCreateDuplicateFile()
+ {
+ // Get directory of current executing assembly
+ var directory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
+
+ // Arrange
+ var fileName = $"{directory}/Resources/test-file-duplicate.txt";
+ var removalIndicator = "//NOTEXIST";
+
+ // Act
+ var (duplicateFilePath, createdDuplicate) = this.fileUtilityService.DuplicateFileWithoutLines(fileName, removalIndicator);
+
+ // Assert
+ createdDuplicate.Should().BeFalse();
+ duplicateFilePath.Should().BeNull();
+ }
+}
diff --git a/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj b/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj
index 3aed7eac5..8e9b5955a 100644
--- a/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj
+++ b/test/Microsoft.ComponentDetection.Common.Tests/Microsoft.ComponentDetection.Common.Tests.csproj
@@ -1,4 +1,4 @@
-
+
@@ -13,4 +13,10 @@
+
+
+ Always
+
+
+
diff --git a/test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt b/test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt
new file mode 100644
index 000000000..9e8b2a86b
--- /dev/null
+++ b/test/Microsoft.ComponentDetection.Common.Tests/Resources/test-file-duplicate.txt
@@ -0,0 +1,3 @@
+hello
+//REMOVE
+world
diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs
index 3cebac312..2ecf76113 100644
--- a/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs
+++ b/test/Microsoft.ComponentDetection.Detectors.Tests/PipCommandServiceTests.cs
@@ -492,6 +492,108 @@ public async Task PipCommandService_GeneratesReport_MultiRequirementsTxt_Correct
this.commandLineInvokationService.Verify();
}
+ [TestMethod]
+ public async Task PipCommandService_GeneratesReport_WithDuplicate_Async()
+ {
+ var testPath = Path.Join(Directory.GetCurrentDirectory(), string.Join(Guid.NewGuid().ToString(), "requirements.txt"));
+
+ this.envVarService.Setup(x => x.IsEnvironmentVariableValueTrue("PipReportIgnoreFileLevelIndexUrl")).Returns(true);
+ this.commandLineInvokationService.Setup(x => x.CanCommandBeLocatedAsync("pip", It.IsAny>(), "--version")).ReturnsAsync(true);
+
+ this.fileUtilityService.Setup(x => x.DuplicateFileWithoutLines(It.IsAny(), "--index-url"))
+ .Returns(("C:/asdf/temp.requirements.txt", true))
+ .Verifiable();
+ this.fileUtilityService.Setup(x => x.ReadAllTextAsync(It.IsAny()))
+ .ReturnsAsync(TestResources.pip_report_multi_pkg);
+
+ this.commandLineInvokationService.Setup(x => x.ExecuteCommandAsync(
+ "pip",
+ It.IsAny>(),
+ It.Is(d => d.FullName.Contains(Directory.GetCurrentDirectory(), StringComparison.OrdinalIgnoreCase)),
+ It.IsAny(),
+ It.Is(s => s.Contains("temp.requirements.txt", StringComparison.OrdinalIgnoreCase))))
+ .ReturnsAsync(new CommandLineExecutionResult { ExitCode = 0, StdErr = string.Empty, StdOut = string.Empty })
+ .Verifiable();
+
+ var service = new PipCommandService(
+ this.commandLineInvokationService.Object,
+ this.pathUtilityService,
+ this.fileUtilityService.Object,
+ this.envVarService.Object,
+ this.logger.Object);
+
+ var (report, reportFile) = await service.GenerateInstallationReportAsync(testPath);
+ this.fileUtilityService.Verify();
+ this.commandLineInvokationService.Verify();
+ }
+
+ [TestMethod]
+ public async Task PipCommandService_GeneratesReport_WithoutDuplicate_Async()
+ {
+ var testPath = Path.Join(Directory.GetCurrentDirectory(), string.Join(Guid.NewGuid().ToString(), "requirements.txt"));
+
+ this.envVarService.Setup(x => x.IsEnvironmentVariableValueTrue("PipReportIgnoreFileLevelIndexUrl")).Returns(true);
+ this.commandLineInvokationService.Setup(x => x.CanCommandBeLocatedAsync("pip", It.IsAny>(), "--version")).ReturnsAsync(true);
+
+ this.fileUtilityService.Setup(x => x.DuplicateFileWithoutLines(It.IsAny(), "--index-url"))
+ .Returns((null, false))
+ .Verifiable();
+ this.fileUtilityService.Setup(x => x.ReadAllTextAsync(It.IsAny()))
+ .ReturnsAsync(TestResources.pip_report_multi_pkg);
+
+ this.commandLineInvokationService.Setup(x => x.ExecuteCommandAsync(
+ "pip",
+ It.IsAny>(),
+ It.Is(d => d.FullName.Contains(Directory.GetCurrentDirectory(), StringComparison.OrdinalIgnoreCase)),
+ It.IsAny(),
+ It.Is(s => !s.Contains("temp.requirements.txt", StringComparison.OrdinalIgnoreCase))))
+ .ReturnsAsync(new CommandLineExecutionResult { ExitCode = 0, StdErr = string.Empty, StdOut = string.Empty })
+ .Verifiable();
+
+ var service = new PipCommandService(
+ this.commandLineInvokationService.Object,
+ this.pathUtilityService,
+ this.fileUtilityService.Object,
+ this.envVarService.Object,
+ this.logger.Object);
+
+ var (report, reportFile) = await service.GenerateInstallationReportAsync(testPath);
+ this.fileUtilityService.Verify();
+ this.commandLineInvokationService.Verify();
+ }
+
+ [TestMethod]
+ public async Task PipCommandService_GeneratesReport_UseFileIndex_Async()
+ {
+ var testPath = Path.Join(Directory.GetCurrentDirectory(), string.Join(Guid.NewGuid().ToString(), "requirements.txt"));
+
+ this.envVarService.Setup(x => x.IsEnvironmentVariableValueTrue("PipReportIgnoreFileLevelIndexUrl")).Returns(false);
+ this.commandLineInvokationService.Setup(x => x.CanCommandBeLocatedAsync("pip", It.IsAny>(), "--version")).ReturnsAsync(true);
+
+ this.fileUtilityService.Setup(x => x.ReadAllTextAsync(It.IsAny()))
+ .ReturnsAsync(TestResources.pip_report_multi_pkg);
+
+ this.commandLineInvokationService.Setup(x => x.ExecuteCommandAsync(
+ "pip",
+ It.IsAny>(),
+ It.Is(d => d.FullName.Contains(Directory.GetCurrentDirectory(), StringComparison.OrdinalIgnoreCase)),
+ It.IsAny(),
+ It.Is(s => !s.Contains("temp.requirements.txt", StringComparison.OrdinalIgnoreCase))))
+ .ReturnsAsync(new CommandLineExecutionResult { ExitCode = 0, StdErr = string.Empty, StdOut = string.Empty })
+ .Verifiable();
+
+ var service = new PipCommandService(
+ this.commandLineInvokationService.Object,
+ this.pathUtilityService,
+ this.fileUtilityService.Object,
+ this.envVarService.Object,
+ this.logger.Object);
+
+ var (report, reportFile) = await service.GenerateInstallationReportAsync(testPath);
+ this.fileUtilityService.Verify(x => x.DuplicateFileWithoutLines(It.IsAny(), "--index-url"), Times.Never);
+ this.commandLineInvokationService.Verify();
+ }
+
[TestMethod]
public async Task PipCommandService_GeneratesReport_BadFile_FailsAsync()
{
diff --git a/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt b/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt
new file mode 100644
index 000000000..391be3370
--- /dev/null
+++ b/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/index-removal/requirements.txt
@@ -0,0 +1,3 @@
+--index-url https://pkgs.dev.azure.com/msazure/_packaging/Avere-internal-python/pypi/simple
+
+adal==1.2.7 ; python_full_version >= "3.8.4" and python_full_version < "4.0.0"