diff --git a/extractor/filesystem/os/apk/extractor.go b/extractor/filesystem/os/apk/extractor.go index 2c5cad63..ae2ea1bf 100644 --- a/extractor/filesystem/os/apk/extractor.go +++ b/extractor/filesystem/os/apk/extractor.go @@ -18,11 +18,8 @@ package apk import ( "bufio" "context" - "errors" "fmt" - "io" "io/fs" - "net/textproto" "path/filepath" "strings" @@ -129,67 +126,94 @@ func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([] return inventory, err } +// parseSingleApkRecord reads from the scanner a single record, +// returns nil, nil when scanner ends. +func parseSingleApkRecord(scanner *bufio.Scanner) (map[string]string, error) { + // There is currently 26 keys defined here (Under "Installed Database V2"): + // https://wiki.alpinelinux.org/wiki/Apk_spec + group := map[string]string{} + + for scanner.Scan() { + line := scanner.Text() + + if line != "" { + key, val, found := strings.Cut(line, ":") + + if !found { + return nil, fmt.Errorf("invalid line: %q", line) + } + + group[key] = val + continue + } + + // check both that line is empty and we have filled out data in group + // this avoids double empty lines returning early + if line == "" && len(group) > 0 { + // scanner.Err() could only be non nil when Scan() returns false + // so we can return nil directly here + return group, nil + } + } + + return group, scanner.Err() +} + func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { m, err := osrelease.GetOSRelease(input.FS) if err != nil { log.Errorf("osrelease.ParseOsRelease(): %v", err) } - rd := textproto.NewReader(bufio.NewReader(input.Reader)) - pkgs := []*extractor.Inventory{} + scanner := bufio.NewScanner(input.Reader) + inventories := []*extractor.Inventory{} + for eof := false; !eof; { - // Return if canceled or exceeding deadline. if err := ctx.Err(); err != nil { - return pkgs, fmt.Errorf("%s halted at %q because of context error: %v", e.Name(), input.Path, err) + return nil, fmt.Errorf("%s halted at %q because of context error: %v", e.Name(), input.Path, err) } - h, err := rd.ReadMIMEHeader() + record, err := parseSingleApkRecord(scanner) if err != nil { - if errors.Is(err, io.EOF) { - // We might still have one more line of data - // so return only after it's been parsed. - eof = true - } else { - log.Errorf("Failed to extract all APK packages: %v", err) - return pkgs, nil - } + return nil, fmt.Errorf("error while parsing apk status file %q: %w", input.Path, err) } - pkgName := h.Get("P") - version := h.Get("V") - if pkgName == "" || version == "" { - if !eof { // Expected when reaching the last line. - log.Warnf("APK package name or version is empty (name: %q, version: %q)", pkgName, version) - } - continue + + if len(record) == 0 { + break } - originName := h.Get("o") - maintainer := h.Get("m") - arch := h.Get("A") - license := h.Get("L") - commit := h.Get("c") + var sourceCode *extractor.SourceCodeIdentifier - if commit != "" { + if commit, ok := record["c"]; ok { sourceCode = &extractor.SourceCodeIdentifier{ Commit: commit, } } - pkgs = append(pkgs, &extractor.Inventory{ - Name: pkgName, - Version: version, + + var pkg = &extractor.Inventory{ + Name: record["P"], + Version: record["V"], Metadata: &Metadata{ - PackageName: pkgName, - OriginName: originName, OSID: m["ID"], OSVersionID: m["VERSION_ID"], - Maintainer: maintainer, - Architecture: arch, - License: license, + PackageName: record["P"], + OriginName: record["o"], + Architecture: record["A"], + License: record["L"], + Maintainer: record["m"], }, SourceCode: sourceCode, Locations: []string{input.Path}, - }) + } + + if pkg.Name == "" || pkg.Version == "" { + log.Warnf("APK package name or version is empty (name: %q, version: %q)", pkg.Name, pkg.Version) + continue + } + + inventories = append(inventories, pkg) } - return pkgs, nil + + return inventories, nil } func toNamespace(m *Metadata) string { diff --git a/extractor/filesystem/os/apk/extractor_test.go b/extractor/filesystem/os/apk/extractor_test.go index 0cab5bbe..eab60fe3 100644 --- a/extractor/filesystem/os/apk/extractor_test.go +++ b/extractor/filesystem/os/apk/extractor_test.go @@ -146,21 +146,21 @@ func TestExtract(t *testing.T) { path: "testdata/installed", osrelease: alpine, wantInventory: []*extractor.Inventory{ - getInventory("testdata/installed", "alpine-baselayout", "alpine-baselayout", "3.4.3-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q1zwvKMnYs1b6ZdPTBJ0Z7D5P3jyA="), - getInventory("testdata/installed", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q1YCAH7jdO2W816b85sUh9Z8av4Cc="), - getInventory("testdata/installed", "alpine-keys", "alpine-keys", "2.4-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "MIT", "Q17Do9XvTHoWjQlRYJe7MhnKd8FTQ="), - getInventory("testdata/installed", "apk-tools", "apk-tools", "2.14.0-r0", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q1NpN9vv021bmYzHQp262233VQXes="), - getInventory("testdata/installed", "busybox", "busybox", "1.36.0-r9", "alpine", "3.18.0", "Sören Tempel ", "x86_64", "GPL-2.0-only", "Q13YHCZdGFFJZvgXLCNpZqvnIg/PQ="), - getInventory("testdata/installed", "busybox-binsh", "busybox", "1.36.0-r9", "alpine", "3.18.0", "Sören Tempel ", "x86_64", "GPL-2.0-only", "Q1uXVXJgqIa0rg2YFhdxJ/CSe4zas="), - getInventory("testdata/installed", "ca-certificates-bundle", "ca-certificates", "20230506-r0", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "MPL-2.0 AND MIT", "Q1R/SF0IZwqesh6/EOcK5l3EOrbD0="), - getInventory("testdata/installed", "libc-utils", "libc-dev", "0.7.2-r5", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "BSD-2-Clause AND BSD-3-Clause", "Q1Llna/ri8oHhlQIRsaG8SGug0ikI="), - getInventory("testdata/installed", "libcrypto3", "openssl", "3.1.0-r4", "alpine", "3.18.0", "Ariadne Conill ", "x86_64", "Apache-2.0", "Q1pU2jX0Nb9bzv0BLgQ/1FEelrSbg="), - getInventory("testdata/installed", "libssl3", "openssl", "3.1.0-r4", "alpine", "3.18.0", "Ariadne Conill ", "x86_64", "Apache-2.0", "Q1XjnaA5LrGpHjoyWOR16xY32oW38="), - getInventory("testdata/installed", "musl", "musl", "1.2.4-r0", "alpine", "3.18.0", "Timo Teräs ", "x86_64", "MIT", "Q153m2gOhVOa253ExsSOb33XXh32s="), - getInventory("testdata/installed", "musl-utils", "musl", "1.2.4-r0", "alpine", "3.18.0", "Timo Teräs ", "x86_64", "MIT AND BSD-2-Clause AND GPL-2.0-or-later", "Q16W+GrXf7HQw+k7JuY7ZAIjWrgYk="), - getInventory("testdata/installed", "scanelf", "pax-utils", "1.3.7-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q14nq9o4+uo2NaLbTVDQB3UeooC0M="), - getInventory("testdata/installed", "ssl_client", "busybox", "1.36.0-r9", "alpine", "3.18.0", "Sören Tempel ", "x86_64", "GPL-2.0-only", "Q11TSZ9b3e+tcUKLyjh08V4vkWJYU="), - getInventory("testdata/installed", "zlib", "zlib", "1.2.13-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "Zlib", "Q1JlboSJkrN4qkDcokr4zenpcWEXQ="), + getInventory("testdata/installed", "alpine-baselayout", "alpine-baselayout", "3.4.3-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "65502ca9379dd29d1ac4b0bf0dcf03a3dd1b324a"), + getInventory("testdata/installed", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "65502ca9379dd29d1ac4b0bf0dcf03a3dd1b324a"), + getInventory("testdata/installed", "alpine-keys", "alpine-keys", "2.4-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "MIT", "aab68f8c9ab434a46710de8e12fb3206e2930a59"), + getInventory("testdata/installed", "apk-tools", "apk-tools", "2.14.0-r0", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "028d34f678a5386c3dc488cc3b62467c7a9d1a0b"), + getInventory("testdata/installed", "busybox", "busybox", "1.36.0-r9", "alpine", "3.18.0", "Sören Tempel ", "x86_64", "GPL-2.0-only", "b5c719c244319df3c72ab1f1ee994c2143cab7f0"), + getInventory("testdata/installed", "busybox-binsh", "busybox", "1.36.0-r9", "alpine", "3.18.0", "Sören Tempel ", "x86_64", "GPL-2.0-only", "b5c719c244319df3c72ab1f1ee994c2143cab7f0"), + getInventory("testdata/installed", "ca-certificates-bundle", "ca-certificates", "20230506-r0", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "MPL-2.0 AND MIT", "59534a02716a92a10d177a118c34066162eff4a6"), + getInventory("testdata/installed", "libc-utils", "libc-dev", "0.7.2-r5", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "BSD-2-Clause AND BSD-3-Clause", "988f183cc9d6699930c3e18ccf4a9e36010afb56"), + getInventory("testdata/installed", "libcrypto3", "openssl", "3.1.0-r4", "alpine", "3.18.0", "Ariadne Conill ", "x86_64", "Apache-2.0", "730b75e01c670e3dba5d6c05420b5f605edb6201"), + getInventory("testdata/installed", "libssl3", "openssl", "3.1.0-r4", "alpine", "3.18.0", "Ariadne Conill ", "x86_64", "Apache-2.0", "730b75e01c670e3dba5d6c05420b5f605edb6201"), + getInventory("testdata/installed", "musl", "musl", "1.2.4-r0", "alpine", "3.18.0", "Timo Teräs ", "x86_64", "MIT", "b0d8a9d948174e28a4aefcee4ef6be872225ccce"), + getInventory("testdata/installed", "musl-utils", "musl", "1.2.4-r0", "alpine", "3.18.0", "Timo Teräs ", "x86_64", "MIT AND BSD-2-Clause AND GPL-2.0-or-later", "b0d8a9d948174e28a4aefcee4ef6be872225ccce"), + getInventory("testdata/installed", "scanelf", "pax-utils", "1.3.7-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "GPL-2.0-only", "84a227baf001b6e0208e3352b294e4d7a40e93de"), + getInventory("testdata/installed", "ssl_client", "busybox", "1.36.0-r9", "alpine", "3.18.0", "Sören Tempel ", "x86_64", "GPL-2.0-only", "b5c719c244319df3c72ab1f1ee994c2143cab7f0"), + getInventory("testdata/installed", "zlib", "zlib", "1.2.13-r1", "alpine", "3.18.0", "Natanael Copa ", "x86_64", "Zlib", "84a227baf001b6e0208e3352b294e4d7a40e93de"), }, wantResultMetric: stats.FileExtractedResultSuccess, }, @@ -182,8 +182,9 @@ func TestExtract(t *testing.T) { { name: "invalid", path: "testdata/invalid", - wantInventory: []*extractor.Inventory{}, - wantResultMetric: stats.FileExtractedResultSuccess, + wantInventory: nil, + wantErr: cmpopts.AnyError, + wantResultMetric: stats.FileExtractedResultErrorUnknown, }, { name: "osrelease openwrt", @@ -191,7 +192,7 @@ func TestExtract(t *testing.T) { osrelease: `ID=openwrt VERSION_ID=1.2.3`, wantInventory: []*extractor.Inventory{ - getInventory("testdata/single", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "openwrt", "1.2.3", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q1YCAH7jdO2W816b85sUh9Z8av4Cc="), + getInventory("testdata/single", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "openwrt", "1.2.3", "Natanael Copa ", "x86_64", "GPL-2.0-only", "65502ca9379dd29d1ac4b0bf0dcf03a3dd1b324a"), }, wantResultMetric: stats.FileExtractedResultSuccess, }, @@ -200,7 +201,7 @@ func TestExtract(t *testing.T) { path: "testdata/single", osrelease: "ID=openwrt", wantInventory: []*extractor.Inventory{ - getInventory("testdata/single", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "openwrt", "", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q1YCAH7jdO2W816b85sUh9Z8av4Cc="), + getInventory("testdata/single", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "openwrt", "", "Natanael Copa ", "x86_64", "GPL-2.0-only", "65502ca9379dd29d1ac4b0bf0dcf03a3dd1b324a"), }, wantResultMetric: stats.FileExtractedResultSuccess, }, @@ -209,7 +210,7 @@ func TestExtract(t *testing.T) { path: "testdata/single", osrelease: "", wantInventory: []*extractor.Inventory{ - getInventory("testdata/single", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "", "", "Natanael Copa ", "x86_64", "GPL-2.0-only", "Q1YCAH7jdO2W816b85sUh9Z8av4Cc="), + getInventory("testdata/single", "alpine-baselayout-data", "alpine-baselayout", "3.4.3-r1", "", "", "Natanael Copa ", "x86_64", "GPL-2.0-only", "65502ca9379dd29d1ac4b0bf0dcf03a3dd1b324a"), }, wantResultMetric: stats.FileExtractedResultSuccess, },