Skip to content

Commit

Permalink
Users/oakeredolu/newpipdetector (#684)
Browse files Browse the repository at this point in the history
* Added a new defaultOff component detector for the new pypi api.
  • Loading branch information
Omotola committed Aug 8, 2023
1 parent 69dc42b commit 0afc58b
Show file tree
Hide file tree
Showing 6 changed files with 604 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;

using System;
using System.Collections.Generic;
using System.Linq;
using System.Reactive.Linq;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.Internal;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;

public class SimplePipComponentDetector : FileComponentDetector, IDefaultOffComponentDetector
{
private readonly IPythonCommandService pythonCommandService;
private readonly ISimplePythonResolver pythonResolver;

public SimplePipComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
IPythonCommandService pythonCommandService,
ISimplePythonResolver pythonResolver,
ILogger<SimplePipComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.pythonCommandService = pythonCommandService;
this.pythonResolver = pythonResolver;
this.Logger = logger;
}

public override string Id => "SimplePip";

public override IList<string> SearchPatterns => new List<string> { "setup.py", "requirements.txt" };

public override IEnumerable<string> Categories => new List<string> { "Python" };

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 1;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
{
this.Logger.LogInformation($"No python found on system. Python detection will not run.");

return Enumerable.Empty<ProcessRequest>().ToObservable();
}

return processRequests;
}

protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var file = processRequest.ComponentStream;

try
{
var initialPackages = await this.pythonCommandService.ParseFileAsync(file.Location, pythonExePath);
var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
.Select(tuple => tuple.PackageString)
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => new PipDependencySpecification(x))
.Where(x => !x.PackageIsUnsafe())
.ToList();

var roots = await this.pythonResolver.ResolveRootsAsync(singleFileComponentRecorder, listedPackage);

RecordComponents(
singleFileComponentRecorder,
roots);

initialPackages.Where(tuple => tuple.Component != null)
.Select(tuple => new DetectedComponent(tuple.Component))
.ToList()
.ForEach(gitComponent => singleFileComponentRecorder.RegisterUsage(gitComponent, isExplicitReferencedDependency: true));
}
catch (Exception e)
{
this.Logger.LogError(e, "Error while parsing pip components in {File}", file.Location);
}
}

private static void RecordComponents(
ISingleFileComponentRecorder recorder,
IList<PipGraphNode> roots)
{
var nonRoots = new Queue<(DetectedComponent, PipGraphNode)>();

var explicitRoots = roots.Select(a => a.Value).ToHashSet();

foreach (var root in roots)
{
var rootDetectedComponent = new DetectedComponent(root.Value);

recorder.RegisterUsage(
rootDetectedComponent,
isExplicitReferencedDependency: true);

foreach (var child in root.Children)
{
nonRoots.Enqueue((rootDetectedComponent, child));
}
}

var registeredIds = new HashSet<string>();

while (nonRoots.Count > 0)
{
var (parent, item) = nonRoots.Dequeue();

var detectedComponent = new DetectedComponent(item.Value);

recorder.RegisterUsage(
detectedComponent,
parentComponentId: parent.Component.Id);

if (!registeredIds.Contains(detectedComponent.Component.Id))
{
foreach (var child in item.Children)
{
nonRoots.Enqueue((detectedComponent, child));
}

registeredIds.Add(detectedComponent.Component.Id);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;

public class SimplePythonResolver : ISimplePythonResolver
{
private static readonly Regex VersionRegex = new(@"-(\d+\.\d+(\.\d)*)(.tar|-)", RegexOptions.Compiled);
private static readonly Regex VersionRegex = new(@"-(\d+(\.)\w+((\+|\.)\w*)*)(.tar|-)", RegexOptions.Compiled);

private readonly ISimplePyPiClient simplePypiClient;
private readonly ILogger<SimplePythonResolver> logger;
Expand Down Expand Up @@ -100,10 +100,18 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec

var simplePythonProject = await this.simplePypiClient.GetSimplePypiProjectAsync(rootPackage);

if (simplePythonProject != null && simplePythonProject.Files.Any())
if (simplePythonProject == null || !simplePythonProject.Files.Any())
{
var pythonProject = this.ConvertSimplePypiProjectToSortedDictionary(simplePythonProject, rootPackage);
this.logger.LogWarning(
"Root dependency {RootPackageName} not found on pypi. Skipping package.",
rootPackage.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name);
}

var pythonProject = this.ConvertSimplePypiProjectToSortedDictionary(simplePythonProject, rootPackage);

if (pythonProject.Keys.Any())
{
state.ValidVersionMap[rootPackage.Name] = pythonProject;

// Grab the latest version as our candidate version
Expand All @@ -121,7 +129,7 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
else
{
this.logger.LogWarning(
"Root dependency {RootPackageName} not found on pypi. Skipping package.",
"Unable to resolve package: {RootPackageName} gotten from pypi possibly due to invalid versions. Skipping package.",
rootPackage.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(rootPackage.Name);
}
Expand Down Expand Up @@ -169,9 +177,17 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
// We haven't encountered this package before, so let's fetch it and find a candidate
var newProject = await this.simplePypiClient.GetSimplePypiProjectAsync(dependencyNode);

if (newProject != null && newProject.Files.Any())
if (newProject == null || !newProject.Files.Any())
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
dependencyNode.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}

var result = this.ConvertSimplePypiProjectToSortedDictionary(newProject, dependencyNode);
if (result.Keys.Any())
{
var result = this.ConvertSimplePypiProjectToSortedDictionary(newProject, dependencyNode);
state.ValidVersionMap[dependencyNode.Name] = result;
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;
Expand All @@ -183,7 +199,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
else
{
this.logger.LogWarning(
"Dependency Package {DependencyName} not found in Pypi. Skipping package",
"Unable to resolve dependency package {DependencyName} gotten from pypi possibly due to invalid versions. Skipping package",
dependencyNode.Name);
singleFileComponentRecorder.RegisterPackageParseFailure(dependencyNode.Name);
}
Expand All @@ -202,7 +218,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
/// <returns> Returns a SortedDictionary of PythonProjectReleases. </returns>
private SortedDictionary<string, IList<PythonProjectRelease>> ConvertSimplePypiProjectToSortedDictionary(SimplePypiProject simplePypiProject, PipDependencySpecification spec)
{
var sortedProjectVersions = new SortedDictionary<string, IList<PythonProjectRelease>>();
var sortedProjectVersions = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer());
foreach (var file in simplePypiProject.Files)
{
try
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace Microsoft.ComponentDetection.Orchestrator.Experiments.Configs;

using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Detectors.Pip;

/// <summary>
/// Validating the <see cref="SimplePipComponentDetector"/>.
/// </summary>
public class SimplePipExperiment : IExperimentConfiguration
{
/// <inheritdoc />
public string Name => "NewPipDetector";

/// <inheritdoc />
public bool IsInControlGroup(IComponentDetector componentDetector) => componentDetector is PipComponentDetector;

/// <inheritdoc />
public bool IsInExperimentGroup(IComponentDetector componentDetector) => componentDetector is SimplePipComponentDetector;

/// <inheritdoc />
public bool ShouldRecord(IComponentDetector componentDetector, int numComponents) => true;
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
services.AddSingleton<IExperimentProcessor, DefaultExperimentProcessor>();
services.AddSingleton<IExperimentConfiguration, NewNugetExperiment>();
services.AddSingleton<IExperimentConfiguration, NpmLockfile3Experiment>();
services.AddSingleton<IExperimentConfiguration, SimplePipExperiment>();

// Detectors
// CocoaPods
Expand Down Expand Up @@ -118,6 +119,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
services.AddSingleton<IPythonResolver, PythonResolver>();
services.AddSingleton<ISimplePythonResolver, SimplePythonResolver>();
services.AddSingleton<IComponentDetector, PipComponentDetector>();
services.AddSingleton<IComponentDetector, SimplePipComponentDetector>();

// pnpm
services.AddSingleton<IComponentDetector, PnpmComponentDetector>();
Expand Down
Loading

0 comments on commit 0afc58b

Please sign in to comment.