Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Update local matcher to the new requirements for resolve #1481

Merged
merged 6 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions cmd/osv-scanner/__snapshots__/main_test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2404,7 +2404,11 @@ Filtered 1 local/unscannable package/s from the scan.
---

[TestRun_LocalDatabases_AlwaysOffline/a_bunch_of_different_lockfiles_and_ecosystem - 2]
could not find local databases for ecosystems: Alpine, Packagist, PyPI, RubyGems, npm
could not load db for PyPI ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for RubyGems ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Alpine ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Packagist ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for npm ecosystem: unable to fetch OSV database: no offline version of the OSV database is available

---

Expand All @@ -2426,7 +2430,11 @@ Filtered 1 local/unscannable package/s from the scan.
---

[TestRun_LocalDatabases_AlwaysOffline/a_bunch_of_different_lockfiles_and_ecosystem - 4]
could not find local databases for ecosystems: Alpine, Packagist, PyPI, RubyGems, npm
could not load db for PyPI ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for RubyGems ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Alpine ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for Packagist ecosystem: unable to fetch OSV database: no offline version of the OSV database is available
could not load db for npm ecosystem: unable to fetch OSV database: no offline version of the OSV database is available

---

Expand Down
41 changes: 18 additions & 23 deletions internal/clients/clientimpl/localmatcher/localmatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import (
"fmt"
"os"
"path"
"slices"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scanner/internal/imodels"
Expand All @@ -26,6 +24,8 @@ type LocalMatcher struct {
dbBasePath string
dbs map[osvschema.Ecosystem]*ZipDB
downloadDB bool
// failedDBs keeps track of the errors when getting databases for each ecosystem
failedDBs map[osvschema.Ecosystem]error
// TODO(v2 logging): Remove this reporter
r reporter.Reporter
}
Expand All @@ -41,15 +41,13 @@ func NewLocalMatcher(r reporter.Reporter, localDBPath string, downloadDB bool) (
dbs: make(map[osvschema.Ecosystem]*ZipDB),
downloadDB: downloadDB,
r: r,
failedDBs: make(map[osvschema.Ecosystem]error),
}, nil
}

func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
func (matcher *LocalMatcher) MatchVulnerabilities(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
results := make([][]*models.Vulnerability, 0, len(invs))

// slice to track ecosystems that did not have an offline database available
var missingDBs []string

for _, inv := range invs {
if ctx.Err() != nil {
return nil, ctx.Err()
Expand All @@ -73,41 +71,38 @@ func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Invent
db, err := matcher.loadDBFromCache(ctx, pkg.Ecosystem)

if err != nil {
if errors.Is(err, ErrOfflineDatabaseNotFound) {
missingDBs = append(missingDBs, string(pkg.Ecosystem.Ecosystem))
} else {
// TODO(V2 logging):
// the most likely error at this point is that the PURL could not be parsed
matcher.r.Errorf("could not load db for %s ecosystem: %v\n", pkg.Ecosystem, err)
}

results = append(results, []*models.Vulnerability{})

continue
}

results = append(results, db.VulnerabilitiesAffectingPackage(pkg))
}

if len(missingDBs) > 0 {
missingDBs = slices.Compact(missingDBs)
slices.Sort(missingDBs)
return results, nil
}

// TODO(v2 logging):
matcher.r.Errorf("could not find local databases for ecosystems: %s\n", strings.Join(missingDBs, ", "))
}
// LoadEcosystem tries to preload the ecosystem into the cache, and returns an error if the ecosystem
// cannot be loaded.
func (matcher *LocalMatcher) LoadEcosystem(ctx context.Context, ecosystem ecosystem.Parsed) error {
_, err := matcher.loadDBFromCache(ctx, ecosystem)

return results, nil
return err
}

func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, ecosystem ecosystem.Parsed) (*ZipDB, error) {
if db, ok := matcher.dbs[ecosystem.Ecosystem]; ok {
return db, nil
}

if matcher.failedDBs[ecosystem.Ecosystem] != nil {
return nil, matcher.failedDBs[ecosystem.Ecosystem]
}

db, err := NewZippedDB(ctx, matcher.dbBasePath, string(ecosystem.Ecosystem), fmt.Sprintf("%s/%s/all.zip", zippedDBRemoteHost, ecosystem.Ecosystem), !matcher.downloadDB)

if err != nil {
matcher.failedDBs[ecosystem.Ecosystem] = err
matcher.r.Errorf("could not load db for %s ecosystem: %v\n", ecosystem.Ecosystem, err)

return nil, err
}

Expand Down
2 changes: 1 addition & 1 deletion internal/clients/clientimpl/osvmatcher/osvmatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type OSVMatcher struct {
InitialQueryTimeout time.Duration
}

func (matcher *OSVMatcher) Match(ctx context.Context, pkgs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
func (matcher *OSVMatcher) MatchVulnerabilities(ctx context.Context, pkgs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
var batchResp *osvdev.BatchedResponse
deadlineExceeded := false

Expand Down
2 changes: 1 addition & 1 deletion internal/clients/clientinterfaces/vulnerabilitymatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ import (
)

type VulnerabilityMatcher interface {
Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error)
MatchVulnerabilities(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error)
}
6 changes: 6 additions & 0 deletions internal/imodels/ecosystem/ecosystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ func MustParse(str string) Parsed {

// Parse parses a string into a constants.Ecosystem and an optional suffix specified with a ":"
func Parse(str string) (Parsed, error) {
// Special case to return an empty ecosystem if str is empty
// This is not considered an error.
if str == "" {
return Parsed{}, nil
}

ecosystem, suffix, _ := strings.Cut(str, ":")

// Always return the full parsed value even if it might be invalid
Expand Down
57 changes: 37 additions & 20 deletions internal/imodels/imodels.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/google/osv-scanner/internal/imodels/ecosystem"
"github.com/google/osv-scanner/internal/scalibrextract/vcs/gitrepo"
"github.com/google/osv-scanner/pkg/models"
"github.com/ossf/osv-schema/bindings/go/osvschema"

scalibrosv "github.com/google/osv-scalibr/extractor/filesystem/osv"
)
Expand Down Expand Up @@ -68,56 +69,72 @@ func FromInventory(inventory *extractor.Inventory) PackageInfo {
Location: inventory.Locations[0],
AdditionalLocations: inventory.Locations[1:],
OriginalInventory: inventory,
// TODO: SourceType
}

// Ignore this error for now as we can't do too much about an unknown ecosystem
// Set Ecosystem
eco, err := ecosystem.Parse(inventory.Ecosystem())
if err != nil {
// Ignore this error for now as we can't do too much about an unknown ecosystem
// TODO(v2): Replace with slog
log.Printf("Warning: %s\n", err.Error())
}

pkgInfo.Ecosystem = eco

// Set Commit and Repository
if inventory.SourceCode != nil {
pkgInfo.Commit = inventory.SourceCode.Commit
pkgInfo.Repository = inventory.SourceCode.Repo
}

// Set DepGroups
if dg, ok := inventory.Metadata.(scalibrosv.DepGroups); ok {
pkgInfo.DepGroups = dg.DepGroups()
}

// Set SourceType
if inventory.Extractor != nil {
extractorName := inventory.Extractor.Name()
if _, ok := osExtractors[extractorName]; ok {
pkgInfo.SourceType = SourceTypeOSPackage
} else if _, ok := sbomExtractors[extractorName]; ok {
pkgInfo.SourceType = SourceTypeSBOM

// TODO (V2): SBOMs have a special case where we manually convert the PURL here
// instead while PURL to ESI conversion is not complete
purl := inventory.Extractor.ToPURL(inventory)

if purl != nil {
// Error should never happen here since the PURL is from an already parsed purl
pi, _ := models.PURLToPackage(purl.String())
pkgInfo.Name = pi.Name
pkgInfo.Version = pi.Version
parsed, err := ecosystem.Parse(pi.Ecosystem)
if err != nil {
// TODO: Replace with slog
log.Printf("Warning, found unexpected ecosystem in purl %q, likely will not return any results for this package.\n", purl.String())
}
pkgInfo.Ecosystem = parsed
}
} else if _, ok := gitExtractors[extractorName]; ok {
pkgInfo.SourceType = SourceTypeGit
} else {
pkgInfo.SourceType = SourceTypeProjectPackage
}
}

// --- General Patching ---

// Scalibr uses go to indicate go compiler version
// We specifically cares about the stdlib version inside the package
// so convert the package name from go to stdlib
if eco.Ecosystem == osvschema.EcosystemGo && inventory.Name == "go" {
pkgInfo.Name = "stdlib"
}

// TODO (V2): SBOMs have a special case where we manually convert the PURL here
// instead while PURL to ESI conversion is not complete
if pkgInfo.SourceType == SourceTypeSBOM {
purl := inventory.Extractor.ToPURL(inventory)
if purl != nil {
// Error should never happen here since the PURL is from an already parsed purl
pi, _ := models.PURLToPackage(purl.String())
pkgInfo.Name = pi.Name
pkgInfo.Version = pi.Version
parsed, err := ecosystem.Parse(pi.Ecosystem)
if err != nil {
// TODO: Replace with slog
log.Printf("Warning, found unexpected ecosystem in purl %q, likely will not return any results for this package.\n", purl.String())
}
pkgInfo.Ecosystem = parsed
}
}

// --- Metadata Patching ---

// Depending on the specific metadata types set fields in package info.
if metadata, ok := inventory.Metadata.(*apk.Metadata); ok {
pkgInfo.OSPackageName = metadata.PackageName
} else if metadata, ok := inventory.Metadata.(*dpkg.Metadata); ok {
Expand Down
2 changes: 1 addition & 1 deletion pkg/osvscanner/osvscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func makeRequestWithMatcher(
invs = append(invs, pkgs.PackageInfo.OriginalInventory)
}

res, err := matcher.Match(context.Background(), invs)
res, err := matcher.MatchVulnerabilities(context.Background(), invs)
if err != nil {
// TODO: Handle error here
r.Errorf("error when retrieving vulns: %v", err)
Expand Down
Loading