Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support use of Maven to resolve all dependencies. #2669

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,14 @@ java:
# this option is helpful for when the parent pom has more data,
# that is not accessible from within the final built artifact
use-network: false
# Enables use of Maven application to resolve found pom.xml files, ensures all properties are resolved
# and all configuration is inherited from parent poms.
# When `use-network` is false, Maven will work in offline mode. If a Maven build has been run before
# the scan, all required pom files will be available locally.
# If Maven is not available a warning will be logged and the unresolved pom.xml will be used.
use-maven: true
# Command used to run Maven. May include full path.
maven-command: "mvn"

linux-kernel:
# whether to catalog linux kernel modules found within lib/modules/** directories
Expand Down
2 changes: 2 additions & 0 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
},
JavaArchive: java.DefaultArchiveCatalogerConfig().
WithUseNetwork(cfg.Java.UseNetwork).
WithUseMaven(cfg.Java.UseMaven).
WithMavenCommand(cfg.Java.MavenCommand).
WithMavenBaseURL(cfg.Java.MavenURL).
WithArchiveTraversal(archiveSearch, cfg.Java.MaxParentRecursiveDepth),
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/syft/internal/options/java.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package options

type javaConfig struct {
UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"`
UseMaven bool `yaml:"use-maven" json:"use-maven" mapstructure:"use-maven"`
MavenCommand string `yaml:"maven-command" json:"maven-command" mapstructure:"maven-command"`
MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"`
MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"`
}
2 changes: 1 addition & 1 deletion syft/file/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type LicenseEvidence struct {
func NewLicense(value string) License {
spdxExpression, err := license.ParseExpression(value)
if err != nil {
log.Trace("unable to parse license expression: %s, %w", value, err)
log.Tracef("unable to parse license expression: %s, %w", value, err)
}

return License{
Expand Down
7 changes: 7 additions & 0 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchivePa

// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
// Get full path
archiveFilename := string(reader.Reference().RealPath.Normalize())
// Get path to file within the archive
if archiveFilename == "/" {
archiveFilename = reader.AccessPath
}
log.Tracef("Processing Java archive: '%q'", archiveFilename)
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
detectNested: false,
config: ArchiveCatalogerConfig{
UseNetwork: true,
UseMaven: false,
MavenBaseURL: url,
MaxParentRecursiveDepth: 2,
},
Expand Down Expand Up @@ -1337,7 +1338,9 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/org.apache.directory.api/api-asn1-api/pom.xml",
GroupID: "org.apache.directory.api",
ArtifactID: "api-asn1-api",
Version: "2.0.0",
Name: "Apache Directory API ASN.1 API",
Description: "ASN.1 API",
Parent: &pkg.JavaPomParent{
Expand Down
17 changes: 17 additions & 0 deletions syft/pkg/cataloger/java/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package java
import "github.com/anchore/syft/syft/cataloging"

const mavenBaseURL = "https://repo1.maven.org/maven2"
const mavenCommand = "mvn"

type ArchiveCatalogerConfig struct {
cataloging.ArchiveSearchConfig `yaml:",inline" json:"" mapstructure:",squash"`
UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"`
UseMaven bool `yaml:"use-maven" json:"use-maven" mapstructure:"use-maven"`
MavenCommand string `yaml:"maven-command" json:"maven-command" mapstructure:"maven-command"`
MavenBaseURL string `yaml:"maven-base-url" json:"maven-base-url" mapstructure:"maven-base-url"`
MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"`
}
Expand All @@ -15,6 +18,8 @@ func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig {
return ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.DefaultArchiveSearchConfig(),
UseNetwork: false,
UseMaven: true,
MavenCommand: mavenCommand,
MavenBaseURL: mavenBaseURL,
MaxParentRecursiveDepth: 5,
}
Expand All @@ -25,6 +30,18 @@ func (j ArchiveCatalogerConfig) WithUseNetwork(input bool) ArchiveCatalogerConfi
return j
}

func (j ArchiveCatalogerConfig) WithUseMaven(input bool) ArchiveCatalogerConfig {
j.UseMaven = input
return j
}

func (j ArchiveCatalogerConfig) WithMavenCommand(input string) ArchiveCatalogerConfig {
if input != "" {
j.MavenCommand = input
}
return j
}

func (j ArchiveCatalogerConfig) WithMavenBaseURL(input string) ArchiveCatalogerConfig {
if input != "" {
j.MavenBaseURL = input
Expand Down
7 changes: 7 additions & 0 deletions syft/pkg/cataloger/java/maven_repo_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package java

import (
"context"
"errors"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -30,6 +31,8 @@ func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (reque

// An artifact can have its version defined in a parent's DependencyManagement section
func recursivelyFindVersionFromParentPom(ctx context.Context, groupID, artifactID, parentGroupID, parentArtifactID, parentVersion string, cfg ArchiveCatalogerConfig) string {
log.Debugf("recursively finding version from parent Pom for artifact [%v:%v], using parent pom: [%v:%v:%v]",
groupID, artifactID, parentGroupID, parentArtifactID, parentVersion)
// As there can be nested parent poms, we'll recursively check for the version until we reach the max depth
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
parentPom, err := getPomFromMavenRepo(ctx, parentGroupID, parentArtifactID, parentVersion, cfg.MavenBaseURL)
Expand Down Expand Up @@ -80,7 +83,11 @@ func recursivelyFindLicensesFromParentPom(ctx context.Context, groupID, artifact
}

func getPomFromMavenRepo(ctx context.Context, groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
if len(groupID) == 0 || len(artifactID) == 0 || len(version) == 0 {
return nil, errors.New("missing/incomplete maven artiface coordinates, cannot download pom from repository")
}
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
log.Tracef("Requesting pom for artifact %s:%s:%s", groupID, artifactID, version)
if err != nil {
return nil, err
}
Expand Down
127 changes: 125 additions & 2 deletions syft/pkg/cataloger/java/parse_pom_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ import (
"encoding/xml"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"reflect"
"regexp"
"strings"
Expand All @@ -23,9 +26,56 @@ import (

const pomXMLGlob = "*pom.xml"

var checkedForMaven = false
var mavenAvailable = false

var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")

func (gap genericArchiveParserAdapter) parserPomXML(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pom string
// try to get absolute path first. This fails for tests, so fall back to AccessPath
if reader.Reference().RealPath != "" {
pom = string(reader.Reference().RealPath.Normalize())
} else {
pom = reader.AccessPath
}

log.Tracef("Found POM in dir: %q", filepath.Dir(pom))

trueLocation := reader.Location

if gap.cfg.UseMaven && isMavenAvailable(gap.cfg.MavenCommand) {
absPathPom, err := filepath.Abs(pom)

if err != nil {
log.Errorf("skipping generating effective pom: could not get absolute location of pom file %q : %w", pom, err)
return parserPomXML(ctx, reader, gap, trueLocation)
}

var effectivePomFile = filepath.Join(filepath.Dir(absPathPom), "target", "effective-pom.xml")

generateEffectivePom(pom, effectivePomFile, gap.cfg.MavenCommand, gap.cfg.UseNetwork)

var pomReader io.ReadCloser
pomReader, err = os.Open(effectivePomFile)

if err == nil {
var pomLocation file.Location = file.NewLocation(effectivePomFile)

reader = file.NewLocationReadCloser(pomLocation, pomReader)
log.Debugf("Parsing effective POM: %q", effectivePomFile)
} else {
log.Errorf("Could not open file %q : %w", effectivePomFile, err)
}
} else {
log.Debugf("Parsing unresolved POM: %q", pom)
}

return parserPomXML(ctx, reader, gap, trueLocation)
}

// Parse pom file, when an effective pom file was generated, originalPom points to the original pom file
func parserPomXML(ctx context.Context, reader file.LocationReadCloser, gap genericArchiveParserAdapter, originalPom file.Location) ([]pkg.Package, []artifact.Relationship, error) {
pom, err := decodePomXML(reader)
if err != nil {
return nil, nil, err
Expand All @@ -34,24 +84,77 @@ func (gap genericArchiveParserAdapter) parserPomXML(ctx context.Context, _ file.
var pkgs []pkg.Package
if pom.Dependencies != nil {
for _, dep := range *pom.Dependencies {
var location file.Location
if originalPom.Coordinates != reader.Location.Coordinates {
location = originalPom.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
} else {
location = reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
}

p := newPackageFromPom(
ctx,
pom,
dep,
gap.cfg,
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
location,
)
if p.Name == "" {
continue
}

pkgs = append(pkgs, p)

if len(p.Version) == 0 || strings.HasPrefix(p.Version, "${") {
groupID := *dep.GroupID
artifactID := *dep.ArtifactID
artifact := groupID + ":" + artifactID
log.Infof("Found artifact without version: %q, version: %q", artifact, p.Version)
}
}
}

return pkgs, nil, nil
}

func isMavenAvailable(mvnCommand string) bool {
// Only check for Maven on first call
if !checkedForMaven {
log.Tracef("Running command: %q -v", mvnCommand)

cmd := exec.Command(mvnCommand, "-v")
_, err := cmd.Output()

if err == nil {
log.Trace("Maven is available.")
mavenAvailable = true
} else {
log.Warnf("Maven is not available java pom.xml file analysis might be incomplete/incorrect! %+v", err)
}
checkedForMaven = true
}
return mavenAvailable
}

func generateEffectivePom(pomFile string, effectivePomFile string, mvnCommand string, useNetwork bool) {
log.Debugf("Generating effective POM for: %q", pomFile)

var args = []string{"help:effective-pom", "--non-recursive"}

if !useNetwork {
args = append(args, "--offline")
}

args = append(args, "-Doutput="+effectivePomFile, "--file", pomFile)

cmd := exec.Command(mvnCommand, args...) // #nosec G204
output, err := cmd.Output()

if err != nil {
log.Errorf("failed to execute command: %q: %+v", cmd, err)
log.Debug(string(output))
}
log.Trace(string(output))
}

func parsePomXMLProject(path string, reader io.Reader, location file.Location) (*parsedPomProject, error) {
project, err := decodePomXML(reader)
if err != nil {
Expand Down Expand Up @@ -164,6 +267,26 @@ func decodePomXML(content io.Reader) (project gopom.Project, err error) {
return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
}

// For modules groupID and version are almost always inherited from parent pom
if project.GroupID == nil && project.Parent != nil {
project.GroupID = project.Parent.GroupID
}
if project.Version == nil && project.Parent != nil {
project.Version = project.Parent.Version
}

// If missing, add maven built-in version property often used in multi-module projects
if project.Version != nil {
if project.Properties == nil {
var props gopom.Properties
props.Entries = make(map[string]string)
props.Entries["project.version"] = *project.Version
project.Properties = &props
} else {
project.Properties.Entries["project.version"] = *project.Version
}
}

return project, nil
}

Expand Down
Loading
Loading