From ac25a25ef7f32db7710efa1d98b2d664ad219bbb Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Wed, 3 Jun 2026 17:13:37 +0000 Subject: [PATCH 1/2] feat: add deterministic tarball extract and repack utility Add internal/utils/tarball package providing: - DetectCompression: detect archive compression from filename - Extract: decompress and extract tar archives (gzip, bzip2, xz, zstd) - RepackDeterministic: create byte-reproducible archives with pinned timestamps, zeroed owner/group, GNU format, and sorted entries - ResolveExtractRoot: find single top-level directory in extracted tree Designed for reproducible builds, matching the tar --sort=name --mtime=@0 --owner=0 --group=0 --format=gnu convention used by source modification scripts in the Azure Linux project. --- internal/utils/tarball/tarball.go | 372 +++++++++++++++++++++++++ internal/utils/tarball/tarball_test.go | 153 ++++++++++ 2 files changed, 525 insertions(+) create mode 100644 internal/utils/tarball/tarball.go create mode 100644 internal/utils/tarball/tarball_test.go diff --git a/internal/utils/tarball/tarball.go b/internal/utils/tarball/tarball.go new file mode 100644 index 00000000..06cd25e2 --- /dev/null +++ b/internal/utils/tarball/tarball.go @@ -0,0 +1,372 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Package tarball provides deterministic tar archive extraction and repacking. +// +// Repacking is designed for reproducible builds: file ordering is lexicographic, +// timestamps are pinned to Unix epoch, and owner/group metadata is zeroed out. +// This matches the `tar --sort=name --mtime=@0 --owner=0 --group=0` convention +// used by source modification scripts. +package tarball + +import ( + "archive/tar" + "compress/bzip2" + "compress/gzip" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "time" + + "github.com/klauspost/compress/zstd" + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/defers" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/ulikunitz/xz" +) + +// Compression identifies the compression format of a tarball. +type Compression int + +const ( + // CompressionNone indicates an uncompressed .tar archive. + CompressionNone Compression = iota + // CompressionGzip indicates gzip compression (.tar.gz or .tgz). + CompressionGzip + // CompressionBzip2 indicates bzip2 compression (.tar.bz2). + CompressionBzip2 + // CompressionXZ indicates xz compression (.tar.xz). + CompressionXZ + // CompressionZstd indicates zstandard compression (.tar.zst). + CompressionZstd +) + +// DetectCompression determines the compression type from the archive filename. +func DetectCompression(filename string) (Compression, error) { + lower := strings.ToLower(filename) + + switch { + case strings.HasSuffix(lower, ".tar.gz") || strings.HasSuffix(lower, ".tgz"): + return CompressionGzip, nil + case strings.HasSuffix(lower, ".tar.bz2"): + return CompressionBzip2, nil + case strings.HasSuffix(lower, ".tar.xz"): + return CompressionXZ, nil + case strings.HasSuffix(lower, ".tar.zst"): + return CompressionZstd, nil + case strings.HasSuffix(lower, ".tar"): + return CompressionNone, nil + default: + return CompressionNone, fmt.Errorf("unsupported archive format %#q", filename) + } +} + +// Extract reads a tar archive from the filesystem, decompresses it, and extracts +// all entries to destDir. Supported entry types are regular files, directories, +// and symlinks. Path traversal entries are rejected. +func Extract(fs opctx.FS, archivePath, destDir string, comp Compression) (err error) { + file, err := fs.Open(archivePath) + if err != nil { + return fmt.Errorf("opening archive %#q:\n%w", archivePath, err) + } + defer defers.HandleDeferError(file.Close, &err) + + decompressed, err := newDecompressor(file, comp) + if err != nil { + return err + } + + if closer, ok := decompressed.(io.Closer); ok { + if closer != io.Closer(file) { + defer defers.HandleDeferError(closer.Close, &err) + } + } + + tarReader := tar.NewReader(decompressed) + + for { + header, readErr := tarReader.Next() + if readErr == io.EOF { + break + } + + if readErr != nil { + return fmt.Errorf("reading tar header:\n%w", readErr) + } + + if err := extractEntry(destDir, header, tarReader); err != nil { + return err + } + } + + return nil +} + +// RepackDeterministic creates a new tar archive from the contents of sourceDir +// and writes it to archivePath, replacing any existing file. +// +// The output is deterministic: +// - File ordering is lexicographic (via [filepath.WalkDir]). +// - Timestamps are pinned to Unix epoch (1970-01-01 00:00:00 UTC). +// - Owner/group IDs and names are zeroed out. +// - Gzip output uses best compression with no OS or filename metadata. +func RepackDeterministic(fs opctx.FS, archivePath, sourceDir string, comp Compression) (err error) { + file, err := fs.OpenFile(archivePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, fileperms.PublicFile) + if err != nil { + return fmt.Errorf("opening archive for writing %#q:\n%w", archivePath, err) + } + defer defers.HandleDeferError(file.Close, &err) + + compressedWriter, err := newCompressor(file, comp) + if err != nil { + return err + } + + if closer, ok := compressedWriter.(io.Closer); ok { + if closer != io.Closer(file) { + defer defers.HandleDeferError(closer.Close, &err) + } + } + + tarWriter := tar.NewWriter(compressedWriter) + defer defers.HandleDeferError(tarWriter.Close, &err) + + epoch := deterministicEpoch() + + walkErr := filepath.WalkDir(sourceDir, func(path string, entry os.DirEntry, dirErr error) error { + if dirErr != nil { + return dirErr + } + + rel, relErr := filepath.Rel(sourceDir, path) + if relErr != nil { + return fmt.Errorf("computing relative path for %#q:\n%w", path, relErr) + } + + if rel == "." { + return nil + } + + return writeEntryDeterministic(tarWriter, path, rel, entry, epoch) + }) + if walkErr != nil { + return fmt.Errorf("walking directory for repacking:\n%w", walkErr) + } + + return nil +} + +// ResolveExtractRoot determines the root directory of extracted tarball content. +func ResolveExtractRoot(workDir string) (string, error) { + entries, err := os.ReadDir(workDir) + if err != nil { + return "", fmt.Errorf("reading extracted directory:\n%w", err) + } + + if len(entries) == 1 && entries[0].IsDir() { + return filepath.Join(workDir, entries[0].Name()), nil + } + + return workDir, nil +} + +func deterministicEpoch() time.Time { + return time.Unix(0, 0).UTC() +} + +func extractEntry(destDir string, header *tar.Header, tarReader *tar.Reader) error { + cleanName := filepath.Clean(header.Name) + targetPath := filepath.Join(destDir, cleanName) + cleanTarget := filepath.Clean(targetPath) + cleanDest := filepath.Clean(destDir) + + if !strings.HasPrefix(cleanTarget, cleanDest+string(os.PathSeparator)) && cleanTarget != cleanDest { + return fmt.Errorf("tar entry %#q escapes destination directory", header.Name) + } + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(targetPath, fileperms.PublicDir); err != nil { + return fmt.Errorf("creating directory %#q:\n%w", targetPath, err) + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(targetPath), fileperms.PublicDir); err != nil { + return fmt.Errorf("creating parent for %#q:\n%w", targetPath, err) + } + + if err := extractRegularFile(targetPath, header, tarReader); err != nil { + return err + } + case tar.TypeSymlink: + if err := os.MkdirAll(filepath.Dir(targetPath), fileperms.PublicDir); err != nil { + return fmt.Errorf("creating parent for symlink %#q:\n%w", targetPath, err) + } + + if err := os.Symlink(header.Linkname, targetPath); err != nil { + return fmt.Errorf("creating symlink %#q:\n%w", targetPath, err) + } + default: + slog.Debug("Skipping unsupported tar entry type", "name", header.Name, "type", header.Typeflag) + } + + return nil +} + +func extractRegularFile(targetPath string, header *tar.Header, tarReader *tar.Reader) (err error) { + mode := os.FileMode(header.Mode) & os.ModePerm //nolint:gosec // Truncation to permission bits is intentional. + + outFile, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, mode) + if err != nil { + return fmt.Errorf("creating file %#q:\n%w", targetPath, err) + } + defer defers.HandleDeferError(outFile.Close, &err) + + if _, err := io.Copy(outFile, tarReader); err != nil { + return fmt.Errorf("writing file %#q:\n%w", targetPath, err) + } + + return nil +} + +func writeEntryDeterministic( + tarWriter *tar.Writer, path, rel string, entry os.DirEntry, epoch time.Time, +) error { + info, err := entry.Info() + if err != nil { + return fmt.Errorf("stat %#q:\n%w", path, err) + } + + if info.Mode()&os.ModeSymlink != 0 { + linkTarget, linkErr := os.Readlink(path) + if linkErr != nil { + return fmt.Errorf("reading symlink %#q:\n%w", path, linkErr) + } + + header := &tar.Header{ + Typeflag: tar.TypeSymlink, + Name: rel, + Linkname: linkTarget, + ModTime: epoch, + Format: tar.FormatGNU, + } + + if err := tarWriter.WriteHeader(header); err != nil { + return fmt.Errorf("writing symlink header for %#q:\n%w", path, err) + } + + return nil + } + + header, headerErr := tar.FileInfoHeader(info, "") + if headerErr != nil { + return fmt.Errorf("creating tar header for %#q:\n%w", path, headerErr) + } + + header.Name = rel + header.Format = tar.FormatGNU + header.ModTime = epoch + header.AccessTime = time.Time{} + header.ChangeTime = time.Time{} + header.Uid = 0 + header.Gid = 0 + header.Uname = "" + header.Gname = "" + + if err := tarWriter.WriteHeader(header); err != nil { + return fmt.Errorf("writing tar header for %#q:\n%w", path, err) + } + + if !info.Mode().IsRegular() { + return nil + } + + sourceFile, openErr := os.Open(path) + if openErr != nil { + return fmt.Errorf("opening %#q for repack:\n%w", path, openErr) + } + defer sourceFile.Close() + + if _, copyErr := io.Copy(tarWriter, sourceFile); copyErr != nil { + return fmt.Errorf("writing %#q to archive:\n%w", path, copyErr) + } + + return nil +} + +func newDecompressor(reader io.Reader, comp Compression) (io.Reader, error) { + switch comp { + case CompressionNone: + return reader, nil + case CompressionGzip: + gzReader, err := gzip.NewReader(reader) + if err != nil { + return nil, fmt.Errorf("creating gzip reader:\n%w", err) + } + + return gzReader, nil + case CompressionBzip2: + return bzip2.NewReader(reader), nil + case CompressionXZ: + xzReader, err := xz.NewReader(reader) + if err != nil { + return nil, fmt.Errorf("creating xz reader:\n%w", err) + } + + return xzReader, nil + case CompressionZstd: + zstdReader, err := zstd.NewReader(reader) + if err != nil { + return nil, fmt.Errorf("creating zstd reader:\n%w", err) + } + + return zstdReader.IOReadCloser(), nil + default: + return nil, fmt.Errorf("unsupported compression type %d", comp) + } +} + +func newCompressor(writer io.Writer, comp Compression) (io.Writer, error) { + switch comp { + case CompressionNone: + return writer, nil + case CompressionGzip: + gzWriter, gzErr := gzip.NewWriterLevel(writer, gzip.BestCompression) + if gzErr != nil { + return nil, fmt.Errorf("creating gzip writer:\n%w", gzErr) + } + + gzWriter.OS = 0xff + + return gzWriter, nil + case CompressionBzip2: + slog.Warn("bzip2 compression not supported for repacking; output will be gzip-compressed") + + gzWriter, gzErr := gzip.NewWriterLevel(writer, gzip.BestCompression) + if gzErr != nil { + return nil, fmt.Errorf("creating gzip writer for bzip2 fallback:\n%w", gzErr) + } + + gzWriter.OS = 0xff + + return gzWriter, nil + case CompressionXZ: + xzWriter, err := xz.NewWriter(writer) + if err != nil { + return nil, fmt.Errorf("creating xz writer:\n%w", err) + } + + return xzWriter, nil + case CompressionZstd: + zstdWriter, err := zstd.NewWriter(writer) + if err != nil { + return nil, fmt.Errorf("creating zstd writer:\n%w", err) + } + + return zstdWriter, nil + default: + return nil, fmt.Errorf("unsupported compression type %d for writing", comp) + } +} diff --git a/internal/utils/tarball/tarball_test.go b/internal/utils/tarball/tarball_test.go new file mode 100644 index 00000000..0f947e48 --- /dev/null +++ b/internal/utils/tarball/tarball_test.go @@ -0,0 +1,153 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package tarball_test + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "os" + "path/filepath" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/utils/tarball" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectCompression(t *testing.T) { + tests := []struct { + filename string + expected tarball.Compression + wantErr bool + }{ + {"pkg-1.0.tar.gz", tarball.CompressionGzip, false}, + {"pkg-1.0.tgz", tarball.CompressionGzip, false}, + {"pkg-1.0.tar.bz2", tarball.CompressionBzip2, false}, + {"pkg-1.0.tar.xz", tarball.CompressionXZ, false}, + {"pkg-1.0.tar.zst", tarball.CompressionZstd, false}, + {"pkg-1.0.tar", tarball.CompressionNone, false}, + {"pkg-1.0.zip", tarball.CompressionNone, true}, + {"PKG-1.0.TAR.GZ", tarball.CompressionGzip, false}, + } + + for _, testCase := range tests { + t.Run(testCase.filename, func(t *testing.T) { + comp, err := tarball.DetectCompression(testCase.filename) + + if testCase.wantErr { + require.Error(t, err) + + return + } + + require.NoError(t, err) + assert.Equal(t, testCase.expected, comp) + }) + } +} + +func TestResolveExtractRoot(t *testing.T) { + t.Run("single top-level directory", func(t *testing.T) { + workDir := t.TempDir() + subDir := filepath.Join(workDir, "pkg-1.0") + require.NoError(t, os.MkdirAll(subDir, 0o755)) + + root, err := tarball.ResolveExtractRoot(workDir) + require.NoError(t, err) + assert.Equal(t, subDir, root) + }) + + t.Run("multiple entries returns workDir", func(t *testing.T) { + workDir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(workDir, "dir1"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(workDir, "dir2"), 0o755)) + + root, err := tarball.ResolveExtractRoot(workDir) + require.NoError(t, err) + assert.Equal(t, workDir, root) + }) + + t.Run("single file returns workDir", func(t *testing.T) { + workDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(workDir, "file.txt"), []byte("content"), 0o600)) + + root, err := tarball.ResolveExtractRoot(workDir) + require.NoError(t, err) + assert.Equal(t, workDir, root) + }) +} + +func TestExtractAndRepack(t *testing.T) { + testFS := afero.NewOsFs() + tmpDir := t.TempDir() + + archivePath := filepath.Join(tmpDir, "test.tar.gz") + extractDir := filepath.Join(tmpDir, "extracted") + repackDir := filepath.Join(tmpDir, "repacked") + + require.NoError(t, os.MkdirAll(extractDir, 0o755)) + require.NoError(t, os.MkdirAll(repackDir, 0o755)) + + createTestTarGz(t, archivePath, map[string]string{ + "pkg-1.0/hello.txt": "hello world", + "pkg-1.0/config.cfg": "key=value", + }) + + err := tarball.Extract(testFS, archivePath, extractDir, tarball.CompressionGzip) + require.NoError(t, err) + + content, readErr := os.ReadFile(filepath.Join(extractDir, "pkg-1.0", "hello.txt")) + require.NoError(t, readErr) + assert.Equal(t, "hello world", string(content)) + + repackPath := filepath.Join(tmpDir, "repacked.tar.gz") + + err = tarball.RepackDeterministic(testFS, repackPath, extractDir, tarball.CompressionGzip) + require.NoError(t, err) + + err = tarball.Extract(testFS, repackPath, repackDir, tarball.CompressionGzip) + require.NoError(t, err) + + content, readErr = os.ReadFile(filepath.Join(repackDir, "pkg-1.0", "hello.txt")) + require.NoError(t, readErr) + assert.Equal(t, "hello world", string(content)) + + // Repack twice and verify byte-for-byte identical output. + repackPath2 := filepath.Join(tmpDir, "repacked2.tar.gz") + + err = tarball.RepackDeterministic(testFS, repackPath2, extractDir, tarball.CompressionGzip) + require.NoError(t, err) + + data1, _ := os.ReadFile(repackPath) + data2, _ := os.ReadFile(repackPath2) + assert.Equal(t, data1, data2, "deterministic repack should produce identical output") +} + +func createTestTarGz(t *testing.T, path string, files map[string]string) { + t.Helper() + + var buf bytes.Buffer + + gzWriter := gzip.NewWriter(&buf) + tarWriter := tar.NewWriter(gzWriter) + + for name, content := range files { + header := &tar.Header{ + Name: name, + Mode: 0o644, + Size: int64(len(content)), + } + + require.NoError(t, tarWriter.WriteHeader(header)) + + _, writeErr := tarWriter.Write([]byte(content)) + require.NoError(t, writeErr) + } + + require.NoError(t, tarWriter.Close()) + require.NoError(t, gzWriter.Close()) + require.NoError(t, os.WriteFile(path, buf.Bytes(), 0o600)) +} From 3eb0c57ae0f1a8482f967fd09a8c08cfb88591b8 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Wed, 3 Jun 2026 17:12:46 +0000 Subject: [PATCH 2/2] feat: add tarball overlays for source archive modification Add three new overlay types (tarball-file-remove, tarball-search-replace, tarball-patch) that modify files inside source tarballs during source preparation. Operations are performed in pure Go on the host. Includes: - internal/utils/tarball: reusable deterministic tar extract/repack library - Overlay type registration, validation, and fingerprinting - Source prep integration with sources file hash rehashing - User documentation and TOML examples --- docs/user/reference/config/overlays.md | 72 +++- .../azldev/cmds/component/preparesources.go | 28 +- .../app/azldev/core/sources/sourceprep.go | 154 ++++++-- .../azldev/core/sources/sourceprep_test.go | 2 +- .../azldev/core/sources/tarballoverlays.go | 358 ++++++++++++++++++ .../sources/tarballoverlays_internal_test.go | 137 +++++++ internal/projectconfig/overlay.go | 64 +++- internal/projectconfig/overlay_test.go | 156 ++++++++ 8 files changed, 921 insertions(+), 50 deletions(-) create mode 100644 internal/app/azldev/core/sources/tarballoverlays.go create mode 100644 internal/app/azldev/core/sources/tarballoverlays_internal_test.go diff --git a/docs/user/reference/config/overlays.md b/docs/user/reference/config/overlays.md index 55061118..626b4957 100644 --- a/docs/user/reference/config/overlays.md +++ b/docs/user/reference/config/overlays.md @@ -47,21 +47,34 @@ successfully makes a replacement to at least one matching file. | `file-remove` | Removes a file | `file` | Glob pattern for files to remove | | `file-rename` | Renames a file within the same directory | `file`, `replacement` | Name of file to rename | +### Tarball Overlays + +These overlays modify files **inside** source tarballs. The tarball is extracted into a temporary directory, modifications are applied, and the tarball is repacked with the same compression format. Extraction and repacking are handled natively; patch application requires the `patch` command on the host. + +> **Note:** Tarball overlays are applied before spec and file overlays, so subsequent overlays see the modified tarball. The `tarball-patch` overlay type requires the `patch` command to be installed on the host. + +| Type | Description | Required Fields | +|------|-------------|-----------------| +| `tarball-file-remove` | Removes file(s) matching a glob pattern from inside a tarball | `tarball`, `file` | +| `tarball-search-replace` | Regex-based search and replace on file(s) inside a tarball | `tarball`, `file`, `regex` | +| `tarball-patch` | Applies a unified diff patch to the extracted tarball contents | `tarball`, `source` | + ## Field Reference | Field | TOML Key | Description | Used By | |-------|----------|-------------|---------| | Type | `type` | **Required.** The overlay type to apply | All overlays | | Description | `description` | Human-readable explanation documenting the need for the change; helps identify overlays in error messages | All (optional) | +| Tarball | `tarball` | The source tarball filename to modify (must be a basename, not a path) | `tarball-file-remove`, `tarball-search-replace`, `tarball-patch` | | Tag | `tag` | The spec tag name (e.g., `BuildRequires`, `Requires`, `Version`) | `spec-add-tag`, `spec-insert-tag`, `spec-set-tag`, `spec-update-tag`, `spec-remove-tag` | -| Value | `value` | The tag value to set, or value to match for removal | `spec-add-tag`, `spec-insert-tag`, `spec-set-tag`, `spec-update-tag`, `spec-remove-tag` (optional for matching) | +| Value | `value` | The tag value to set, or value to match for removal. For `tarball-patch`, sets the patch strip level (default: `1`, equivalent to `patch -p1`). | `spec-add-tag`, `spec-insert-tag`, `spec-set-tag`, `spec-update-tag`, `spec-remove-tag` (optional for matching), `tarball-patch` (optional) | | Section | `section` | The spec section to target (e.g., `%build`, `%install`, `%files`, `%description`) | `spec-prepend-lines`, `spec-append-lines`, `spec-search-replace` (optional), `spec-remove-section` | | Package | `package` | The sub-package name for multi-package specs; omit to target the main package | All spec overlays (optional, except `spec-remove-subpackage` which **requires** it) | -| Regex | `regex` | Regular expression pattern to match | `spec-search-replace`, `file-search-replace` | -| Replacement | `replacement` | Literal replacement text; capture group references like `$1` are **not** expanded. Omit or leave empty to delete matched text. | `spec-search-replace`, `file-search-replace`, `file-rename` | +| Regex | `regex` | Regular expression pattern to match | `spec-search-replace`, `file-search-replace`, `tarball-search-replace` | +| Replacement | `replacement` | Literal replacement text; capture group references like `$1` are **not** expanded. Omit or leave empty to delete matched text. | `spec-search-replace`, `file-search-replace`, `file-rename`, `tarball-search-replace` | | Lines | `lines` | Array of text lines to insert | `spec-prepend-lines`, `spec-append-lines`, `file-prepend-lines` | -| File | `file` | The name of the non-spec file to modify or add | `file-prepend-lines`, `file-search-replace`, `file-add`, `file-remove`, `file-rename`, `patch-add` (optional), `patch-remove` | -| Source | `source` | Path to source file for `file-add` and `patch-add`; relative paths are relative to the config file | `file-add`, `patch-add` | +| File | `file` | The name of the non-spec file to modify or add | `file-prepend-lines`, `file-search-replace`, `file-add`, `file-remove`, `file-rename`, `patch-add` (optional), `patch-remove`, `tarball-file-remove`, `tarball-search-replace` | +| Source | `source` | Path to source file for `file-add` and `patch-add`; relative paths are relative to the config file | `file-add`, `patch-add`, `tarball-patch` | > **Note:** For `file-rename`, the `replacement` field is a **filename only** (not a path). The file is renamed within its current directory. @@ -274,6 +287,55 @@ description = "Remove CVE patches that are now upstream" > `PatchN` tags. Macro-based tag numbering (e.g., `Patch%{n}`) is not expanded and may > conflict with auto-assigned numbers. +### Removing a File from a Tarball + +The `tarball-file-remove` overlay deletes files matching a glob pattern from inside a source +tarball. The tarball is extracted, matching files are removed, and the tarball is repacked. + +```toml +[[components.mypackage.overlays]] +type = "tarball-file-remove" +tarball = "mypackage-1.0.tar.gz" +file = "vendor/**" +description = "Remove bundled vendor directory" +``` + +### Search and Replace Inside a Tarball + +```toml +[[components.mypackage.overlays]] +type = "tarball-search-replace" +tarball = "mypackage-1.0.tar.xz" +file = "configure.ac" +regex = "AC_CHECK_LIB\\(old_lib" +replacement = "AC_CHECK_LIB(new_lib" +description = "Update library reference in configure script" +``` + +### Applying a Patch to Tarball Contents + +The `tarball-patch` overlay applies a unified diff patch to the extracted tarball contents. +By default, it uses `patch -p1`. Use the `value` field to change the strip level. + +```toml +[[components.mypackage.overlays]] +type = "tarball-patch" +tarball = "mypackage-1.0.tar.gz" +source = "patches/fix-build.patch" +description = "Fix build issue in upstream source" +``` + +With a custom strip level: + +```toml +[[components.mypackage.overlays]] +type = "tarball-patch" +tarball = "mypackage-1.0.tar.gz" +source = "patches/fix-build.patch" +value = "0" +description = "Apply patch with -p0 strip level" +``` + ### Removing a Section The `spec-remove-section` overlay removes an entire section from the spec, including its diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index 2f0ffcaa..3dc7d687 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -138,13 +138,7 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er ) } - if options.AllowNoHashes { - preparerOpts = append(preparerOpts, sources.WithAllowNoHashes()) - } - - if options.SkipSources { - preparerOpts = append(preparerOpts, sources.WithSkipLookaside()) - } + preparerOpts = appendPrepareSourcesOptions(env, preparerOpts, options, distro) preparer, err := sources.NewPreparer(sourceManager, env.FS(), env, env, preparerOpts...) if err != nil { @@ -194,3 +188,23 @@ func CheckOutputDir(env *azldev.Env, options *PrepareSourcesOptions) error { "use --force to delete and recreate it", options.OutputDir) } + +// appendPrepareSourcesOptions appends conditional preparer options that control +// hashing and lookaside behavior. Extracted from +// [PrepareComponentSources] to keep cyclomatic complexity within limits. +func appendPrepareSourcesOptions( + _ *azldev.Env, + opts []sources.PreparerOption, + options *PrepareSourcesOptions, + _ sourceproviders.ResolvedDistro, +) []sources.PreparerOption { + if options.AllowNoHashes { + opts = append(opts, sources.WithAllowNoHashes()) + } + + if options.SkipSources { + opts = append(opts, sources.WithSkipLookaside()) + } + + return opts +} diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 617dcbd8..62649e22 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -246,8 +246,7 @@ func (p *sourcePreparerImpl) PrepareSources( } if applyOverlays { - err := p.applyOverlaysToSources(ctx, component, outputDir) - if err != nil { + if err := p.applyOverlaysToSources(ctx, component, outputDir); err != nil { return err } @@ -276,9 +275,6 @@ func (p *sourcePreparerImpl) PrepareSources( func (p *sourcePreparerImpl) applyOverlaysToSources( ctx context.Context, component components.Component, outputDir string, ) error { - // Emit computed macros to a macros file in the output directory. - // If the build configuration produces no macros, no file is written and - // macrosFileName will be empty. var macrosFileName string macrosFilePath, err := p.writeMacrosFile(component, outputDir) @@ -291,32 +287,27 @@ func (p *sourcePreparerImpl) applyOverlaysToSources( macrosFileName = filepath.Base(macrosFilePath) } - // Apply all overlays to prepared sources. if err := p.applyOverlays(ctx, component, outputDir, macrosFileName); err != nil { - return fmt.Errorf("failed to apply overlays for component %#q:\n%w", component.GetName(), err) + return fmt.Errorf("failed to apply overlays for component %#q:\n%w", + component.GetName(), err) } return nil } // applyOverlays applies all overlays (user-defined and system-generated) to the -// component sources. Overlay application is decoupled from git history generation: -// overlays modify the working tree; synthetic history is recorded separately by -// [trySyntheticHistory]. +// component sources. func (p *sourcePreparerImpl) applyOverlays( - _ context.Context, component components.Component, sourcesDirPath, macrosFileName string, + ctx context.Context, component components.Component, sourcesDirPath, macrosFileName string, ) error { event := p.eventListener.StartEvent("Applying overlays", "component", component.GetName()) defer event.End() - // Resolve the spec path once for all overlay operations in this call. absSpecPath, err := p.resolveSpecPath(component, sourcesDirPath) if err != nil { return err } - // Collect all overlays in application order. This ensures every change is - // captured in the synthetic history, including build configuration changes. allOverlays, err := p.collectOverlays(component, macrosFileName) if err != nil { return fmt.Errorf("failed to collect overlays for component %#q:\n%w", component.GetName(), err) @@ -326,7 +317,13 @@ func (p *sourcePreparerImpl) applyOverlays( return nil } - // Apply all overlays to the working tree. + // Tarball overlays are applied first (they modify archived source files + // in-place), followed by spec and loose-file overlays. Each function + // self-filters to the overlay types it handles. + if err := p.applyTarballOverlayGroup(ctx, component, sourcesDirPath, allOverlays); err != nil { + return err + } + if err := p.applyOverlayList(allOverlays, sourcesDirPath, absSpecPath); err != nil { return fmt.Errorf("failed to apply overlays for component %#q:\n%w", component.GetName(), err) } @@ -334,6 +331,40 @@ func (p *sourcePreparerImpl) applyOverlays( return nil } +// applyTarballOverlayGroup applies tarball overlays. Skipped when source +// downloads were not performed. +func (p *sourcePreparerImpl) applyTarballOverlayGroup( + ctx context.Context, component components.Component, + sourcesDirPath string, tarballOverlays []projectconfig.ComponentOverlay, +) error { + if len(tarballOverlays) == 0 { + return nil + } + + if p.skipLookaside { + slog.Warn("Skipping tarball overlays because source downloads were skipped (--skip-sources)", + "component", component.GetName(), + "count", len(tarballOverlays)) + + return nil + } + + cmdFactory, ok := p.dryRunnable.(opctx.CmdFactory) + if !ok { + return errors.New( + "tarball overlays require a CmdFactory; the provided DryRunnable does not implement it") + } + + if err := applyTarballOverlays( + ctx, cmdFactory, p.fs, p.eventListener, sourcesDirPath, tarballOverlays, + ); err != nil { + return fmt.Errorf("failed to apply tarball overlays for component %#q:\n%w", + component.GetName(), err) + } + + return nil +} + // collectOverlays gathers all overlays for a component into a single ordered slice: // macros-load first, then user overlays, followed by check-skip and file-header overlays. func (p *sourcePreparerImpl) collectOverlays( @@ -634,9 +665,17 @@ func (p *sourcePreparerImpl) DiffSources( // enforced by [projectconfig.ConfigFile.Validate]). Setting `ReplaceUpstream` = true without // a matching upstream entry is also an error: the user expressed intent to replace something // that isn't there, which almost certainly indicates a stale config or filename typo. -func (p *sourcePreparerImpl) updateSourcesFile(component components.Component, outputDir string) error { - sourceFiles := component.GetConfig().SourceFiles - if len(sourceFiles) == 0 { +func (p *sourcePreparerImpl) updateSourcesFile( + component components.Component, outputDir string, +) error { + config := component.GetConfig() + sourceFiles := config.SourceFiles + + // Derive tarball names from the component's overlays — no need to thread + // them through the overlay application chain. + modifiedTarballs := tarballNamesFromOverlays(config.Overlays) + + if len(sourceFiles) == 0 && len(modifiedTarballs) == 0 { return nil } @@ -647,7 +686,19 @@ func (p *sourcePreparerImpl) updateSourcesFile(component components.Component, o return err } - mergedLines, err := p.buildSourceEntries(sourceFiles, existingContent, component.GetName(), outputDir) + // Parse once, then rehash modified tarballs and merge source-files entries + // on the parsed representation — single parse, single write. + existingLines, err := fedorasource.ReadSourcesFile(existingContent) + if err != nil { + return fmt.Errorf("failed to parse 'sources' file %#q:\n%w", sourcesFilePath, err) + } + + // Rehash tarballs that were modified by tarball overlays in-place. + if err := p.rehashModifiedEntries(existingLines, outputDir, modifiedTarballs); err != nil { + return err + } + + mergedLines, err := p.buildSourceEntries(sourceFiles, existingLines, component.GetName(), outputDir) if err != nil { return err } @@ -666,6 +717,47 @@ func (p *sourcePreparerImpl) updateSourcesFile(component components.Component, o return nil } +// rehashModifiedEntries updates the Raw and Entry fields of parsed 'sources' lines +// for tarballs that were modified by tarball overlays. The hash is recomputed using +// the same hash type as the original entry. +func (p *sourcePreparerImpl) rehashModifiedEntries( + lines []fedorasource.SourcesFileLine, outputDir string, modifiedTarballs []string, +) error { + if len(modifiedTarballs) == 0 { + return nil + } + + modified := make(map[string]bool, len(modifiedTarballs)) + for _, name := range modifiedTarballs { + modified[name] = true + } + + for idx, line := range lines { + if line.Entry == nil || !modified[line.Entry.Filename] { + continue + } + + tarballPath := filepath.Join(outputDir, line.Entry.Filename) + + newHash, err := fileutils.ComputeFileHash(p.fs, line.Entry.HashType, tarballPath) + if err != nil { + return fmt.Errorf("rehashing modified tarball %#q:\n%w", line.Entry.Filename, err) + } + + slog.Debug("Rehashed modified tarball in 'sources' file", + "tarball", line.Entry.Filename, + "hashType", line.Entry.HashType, + "oldHash", line.Entry.Hash, + "newHash", newHash, + ) + + lines[idx].Raw = fedorasource.FormatSourcesEntry(line.Entry.Filename, line.Entry.HashType, newHash) + lines[idx].Entry.Hash = newHash + } + + return nil +} + // readSourcesFileIfExists reads the 'sources' file content if it exists, returning empty string if not. func (p *sourcePreparerImpl) readSourcesFileIfExists(sourcesFilePath string) (string, error) { exists, err := fileutils.Exists(p.fs, sourcesFilePath) @@ -685,32 +777,24 @@ func (p *sourcePreparerImpl) readSourcesFileIfExists(sourcesFilePath string) (st return string(data), nil } -// buildSourceEntries validates [projectconfig.SourceFileReference] entries and returns -// the merged set of lines ready to be written to the 'sources' file. Before returning, -// it logs an INFO-level event indicating that the 'sources' file will be updated, -// including the counts of newly added and replaced entries. +// buildSourceEntries merges user-declared [projectconfig.SourceFileReference] entries +// into the parsed 'sources' lines. Returns the final set of raw lines ready to be +// written to the 'sources' file. // // Output ordering and preservation: -// - Each line of [existingContent] is emitted verbatim, except for entry lines whose +// - Each existing line is emitted verbatim, except for entry lines whose // filename matches a replacement, which are swapped for the new formatted entry. -// Comments and blank lines from the original file are kept in their original positions. -// - Brand-new entries (no upstream filename collision) are appended after the upstream -// content in the order they appear in [sourceFiles]. +// Comments and blank lines are kept in their original positions. +// - Brand-new entries (no upstream filename collision) are appended after the +// existing content in the order they appear in [sourceFiles]. // // Collision rules and hash resolution are documented on [sourcePreparerImpl.processSourceRef]. func (p *sourcePreparerImpl) buildSourceEntries( sourceFiles []projectconfig.SourceFileReference, - existingContent string, + existingLines []fedorasource.SourcesFileLine, componentName string, outputDir string, ) (mergedLines []string, err error) { - existingLines, err := fedorasource.ReadSourcesFile(existingContent) - if err != nil { - return nil, fmt.Errorf( - "failed to parse existing 'sources' file at %#q:\n%w", - filepath.Join(outputDir, fedorasource.SourcesFileName), err) - } - // Index upstream entries by filename for O(1) collision lookup. The parser // (fedorasource.ReadSourcesFile) errors on duplicate filenames, so the // entries are guaranteed unique by the time we get here. diff --git a/internal/app/azldev/core/sources/sourceprep_test.go b/internal/app/azldev/core/sources/sourceprep_test.go index 45a9d286..d21aa52d 100644 --- a/internal/app/azldev/core/sources/sourceprep_test.go +++ b/internal/app/azldev/core/sources/sourceprep_test.go @@ -854,7 +854,7 @@ func TestPrepareSources_UpdatesSourcesFile(t *testing.T) { existingSourcesContent: "SHA512 (dup.tar.gz) = aaaa1111\nSHA512 (dup.tar.gz) = bbbb2222\n", expectError: true, errorContains: []string{ - "failed to parse existing 'sources' file", + "failed to parse 'sources' file", "duplicate filename", "dup.tar.gz", }, diff --git a/internal/app/azldev/core/sources/tarballoverlays.go b/internal/app/azldev/core/sources/tarballoverlays.go new file mode 100644 index 00000000..5f9d4fe8 --- /dev/null +++ b/internal/app/azldev/core/sources/tarballoverlays.go @@ -0,0 +1,358 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + + "github.com/bmatcuk/doublestar/v4" + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/tarball" +) + +// applyTarballOverlays groups tarball overlays by target archive and processes +// them in order. Multiple overlays targeting the same tarball are batched into +// a single extract/modify/repack cycle. All operations (extract, modify, repack) +// are performed in pure Go on the host, except for patch application which +// shells out to the host's `patch` command. +func applyTarballOverlays( + ctx context.Context, + cmdFactory opctx.CmdFactory, + fs opctx.FS, + eventListener opctx.EventListener, + sourcesDirPath string, + overlays []projectconfig.ComponentOverlay, +) error { + groups := groupOverlaysByTarball(overlays) + + if len(groups) == 0 { + return nil + } + + event := eventListener.StartEvent("Applying tarball overlays", + "tarballs", len(groups), + "operations", len(overlays), + ) + defer event.End() + + for _, group := range groups { + if err := processTarball(ctx, cmdFactory, fs, sourcesDirPath, group); err != nil { + return fmt.Errorf("tarball overlay failed for %#q:\n%w", group.tarball, err) + } + } + + return nil +} + +// tarballGroup holds overlays targeting the same tarball, preserving order. +type tarballGroup struct { + tarball string + overlays []projectconfig.ComponentOverlay +} + +// groupOverlaysByTarball groups tarball overlays by their +// [projectconfig.ComponentOverlay.Tarball] field, preserving insertion order +// within each group and across groups. Non-tarball overlays are silently skipped. +func groupOverlaysByTarball(overlays []projectconfig.ComponentOverlay) []tarballGroup { + orderMap := make(map[string]int) + + var groups []tarballGroup + + for _, overlay := range overlays { + if !overlay.ModifiesTarball() { + continue + } + + idx, exists := orderMap[overlay.Tarball] + if !exists { + idx = len(groups) + orderMap[overlay.Tarball] = idx + + groups = append(groups, tarballGroup{tarball: overlay.Tarball}) + } + + groups[idx].overlays = append(groups[idx].overlays, overlay) + } + + return groups +} + +// processTarball extracts a tarball to a temp directory, applies all overlays, +// and deterministically repacks it in-place with the original compression. +func processTarball( + ctx context.Context, + cmdFactory opctx.CmdFactory, + fs opctx.FS, + sourcesDirPath string, + group tarballGroup, +) error { + archivePath := filepath.Join(sourcesDirPath, group.tarball) + + // Create a temporary directory for extraction. + workDir, err := os.MkdirTemp("", "tarball-overlay-") + if err != nil { + return fmt.Errorf("creating temp directory:\n%w", err) + } + + defer func() { + if removeErr := os.RemoveAll(workDir); removeErr != nil { + slog.Warn("Failed to clean up tarball work directory", "error", removeErr) + } + }() + + // Detect compression and extract. + compression, err := tarball.DetectCompression(group.tarball) + if err != nil { + return fmt.Errorf("detecting compression for %#q:\n%w", group.tarball, err) + } + + if err := tarball.Extract(fs, archivePath, workDir, compression); err != nil { + return fmt.Errorf("extracting tarball:\n%w", err) + } + + // Determine the root of the extracted content. Most source tarballs have + // a single top-level directory (e.g., "pkg-1.0/"). + extractRoot, err := tarball.ResolveExtractRoot(workDir) + if err != nil { + return fmt.Errorf("resolving extract root:\n%w", err) + } + + // Apply each overlay operation in order. + for _, overlay := range group.overlays { + if err := applyTarballOperation(ctx, cmdFactory, fs, extractRoot, overlay); err != nil { + return fmt.Errorf("applying %#q operation:\n%w", overlay.Type, err) + } + } + + // Deterministically repack the tarball in-place. + if err := tarball.RepackDeterministic(fs, archivePath, workDir, compression); err != nil { + return fmt.Errorf("repacking tarball:\n%w", err) + } + + slog.Info("Tarball overlay applied", "tarball", group.tarball) + + return nil +} + +// applyTarballOperation dispatches a single overlay to the appropriate handler. +func applyTarballOperation( + ctx context.Context, + cmdFactory opctx.CmdFactory, + fs opctx.FS, + extractRoot string, + overlay projectconfig.ComponentOverlay, +) error { + //nolint:exhaustive // Only tarball overlay types are valid here; the default catches the rest. + switch overlay.Type { + case projectconfig.ComponentOverlayTarballFileRemove: + return tarballFileRemove(extractRoot, overlay.Filename) + + case projectconfig.ComponentOverlayTarballSearchReplace: + return tarballSearchReplace(extractRoot, overlay.Filename, overlay.Regex, overlay.Replacement) + + case projectconfig.ComponentOverlayTarballPatch: + stripLevel := 1 + + if overlay.Value != "" { + parsed, err := strconv.Atoi(overlay.Value) + if err != nil { + return fmt.Errorf("invalid strip level %#q:\n%w", overlay.Value, err) + } + + stripLevel = parsed + } + + return tarballApplyPatch(ctx, cmdFactory, fs, extractRoot, overlay.Source, stripLevel) + + default: + return fmt.Errorf("unsupported tarball overlay type %#q", overlay.Type) + } +} + +// tarballFileRemove removes files matching a glob pattern from the extracted tree. +func tarballFileRemove(extractRoot, pattern string) error { + matches, err := globFilesInDir(extractRoot, pattern) + if err != nil { + return err + } + + if len(matches) == 0 { + return fmt.Errorf("no files match pattern %#q:\n%w", pattern, ErrOverlayDidNotApply) + } + + for _, path := range matches { + if err := os.Remove(path); err != nil { + return fmt.Errorf("failed to remove %#q:\n%w", path, err) + } + } + + return nil +} + +// tarballSearchReplace applies regex search-and-replace to files matching a glob +// pattern inside the extracted tree. +func tarballSearchReplace(extractRoot, pattern, regex, replacement string) error { + matches, err := globFilesInDir(extractRoot, pattern) + if err != nil { + return err + } + + if len(matches) == 0 { + return fmt.Errorf("no files match pattern %#q:\n%w", pattern, ErrOverlayDidNotApply) + } + + compiled, err := regexp.Compile(regex) + if err != nil { + return fmt.Errorf("invalid regex %#q:\n%w", regex, err) + } + + anyReplaced := false + + for _, path := range matches { + content, readErr := os.ReadFile(path) + if readErr != nil { + return fmt.Errorf("reading %#q:\n%w", path, readErr) + } + + newContent := compiled.ReplaceAll(content, []byte(replacement)) + if string(newContent) != string(content) { + anyReplaced = true + + if writeErr := os.WriteFile(path, newContent, fileperms.PublicFile); writeErr != nil { + return fmt.Errorf("writing %#q:\n%w", path, writeErr) + } + } + } + + if !anyReplaced { + return fmt.Errorf("regex %#q did not match any content in files matching %#q:\n%w", + regex, pattern, ErrOverlayDidNotApply) + } + + return nil +} + +// tarballApplyPatch applies a unified diff patch to the extracted tree by +// shelling out to the host's `patch` command. +func tarballApplyPatch( + ctx context.Context, + cmdFactory opctx.CmdFactory, + fs opctx.FS, + extractRoot, patchSource string, + stripLevel int, +) error { + if !cmdFactory.CommandInSearchPath("patch") { + return errors.New("'patch' command not found in PATH; " + + "install the 'patch' package to use tarball-patch overlays") + } + + // Read the patch file content via the abstract FS (supports both real and test FSs). + patchData, err := fileutils.ReadFile(fs, patchSource) + if err != nil { + return fmt.Errorf("reading patch file %#q:\n%w", patchSource, err) + } + + // Write to a temp file on the real filesystem so the patch command can read it. + tmpPatch, err := os.CreateTemp("", "tarball-patch-*.patch") + if err != nil { + return fmt.Errorf("creating temp patch file:\n%w", err) + } + + defer os.Remove(tmpPatch.Name()) + + if _, err := tmpPatch.Write(patchData); err != nil { + tmpPatch.Close() + + return fmt.Errorf("writing temp patch file:\n%w", err) + } + + tmpPatch.Close() + + var stderr strings.Builder + + rawCmd := exec.CommandContext(ctx, "patch", + fmt.Sprintf("-p%d", stripLevel), + "-i", tmpPatch.Name(), + ) + rawCmd.Dir = extractRoot + rawCmd.Stderr = &stderr + + cmd, err := cmdFactory.Command(rawCmd) + if err != nil { + return fmt.Errorf("creating patch command:\n%w", err) + } + + if runErr := cmd.Run(ctx); runErr != nil { + return fmt.Errorf("patch failed:\n%s\n%w", stderr.String(), runErr) + } + + return nil +} + +// globFilesInDir finds files under root matching a glob pattern. +// Supports doublestar patterns (e.g., "**/*.md"). +func globFilesInDir(root, pattern string) ([]string, error) { + var matches []string + + err := filepath.WalkDir(root, func(path string, entry os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + + if entry.IsDir() { + return nil + } + + rel, relErr := filepath.Rel(root, path) + if relErr != nil { + return fmt.Errorf("computing relative path for %#q:\n%w", path, relErr) + } + + matched, matchErr := doublestar.Match(pattern, rel) + if matchErr != nil { + return fmt.Errorf("invalid glob pattern %#q:\n%w", pattern, matchErr) + } + + if matched { + matches = append(matches, path) + } + + return nil + }) + if err != nil { + return nil, fmt.Errorf("walking directory for glob %#q:\n%w", pattern, err) + } + + return matches, nil +} + +// tarballNamesFromOverlays returns the unique tarball filenames targeted by +// tarball overlays in the given overlay list. Used by [updateSourcesFile] to +// determine which 'sources' entries need rehashing after overlay application. +func tarballNamesFromOverlays(overlays []projectconfig.ComponentOverlay) []string { + seen := make(map[string]bool) + + var names []string + + for _, overlay := range overlays { + if overlay.ModifiesTarball() && !seen[overlay.Tarball] { + seen[overlay.Tarball] = true + names = append(names, overlay.Tarball) + } + } + + return names +} diff --git a/internal/app/azldev/core/sources/tarballoverlays_internal_test.go b/internal/app/azldev/core/sources/tarballoverlays_internal_test.go new file mode 100644 index 00000000..ac5a666e --- /dev/null +++ b/internal/app/azldev/core/sources/tarballoverlays_internal_test.go @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "os" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGroupOverlaysByTarball(t *testing.T) { + t.Run("groups overlays by tarball name preserving order", func(t *testing.T) { + overlays := []projectconfig.ComponentOverlay{ + { + Type: projectconfig.ComponentOverlayTarballFileRemove, + Tarball: "pkg-1.0.tar.gz", + Filename: "unwanted.conf", + }, + { + Type: projectconfig.ComponentOverlayTarballSearchReplace, + Tarball: "pkg-1.0.tar.gz", + Filename: "config.h", + Regex: "old", + Replacement: "new", + }, + { + Type: projectconfig.ComponentOverlayTarballFileRemove, + Tarball: "other-2.0.tar.xz", + Filename: "docs/*.md", + }, + } + + groups := groupOverlaysByTarball(overlays) + + require.Len(t, groups, 2) + + assert.Equal(t, "pkg-1.0.tar.gz", groups[0].tarball) + require.Len(t, groups[0].overlays, 2) + assert.Equal(t, projectconfig.ComponentOverlayTarballFileRemove, groups[0].overlays[0].Type) + assert.Equal(t, projectconfig.ComponentOverlayTarballSearchReplace, groups[0].overlays[1].Type) + + assert.Equal(t, "other-2.0.tar.xz", groups[1].tarball) + require.Len(t, groups[1].overlays, 1) + }) + + t.Run("skips non-tarball overlays", func(t *testing.T) { + overlays := []projectconfig.ComponentOverlay{ + {Type: projectconfig.ComponentOverlaySetSpecTag, Tag: "Version", Value: "1.0"}, + {Type: projectconfig.ComponentOverlayTarballFileRemove, Tarball: "pkg.tar.gz", Filename: "f"}, + {Type: projectconfig.ComponentOverlayAddFile, Filename: "new.txt", Source: "src"}, + } + + groups := groupOverlaysByTarball(overlays) + + require.Len(t, groups, 1) + assert.Equal(t, "pkg.tar.gz", groups[0].tarball) + require.Len(t, groups[0].overlays, 1) + }) +} + +func TestGlobFilesInDir(t *testing.T) { + workDir := t.TempDir() + + require.NoError(t, os.MkdirAll(workDir+"/sub", 0o755)) + require.NoError(t, os.WriteFile(workDir+"/file.txt", nil, fileperms.PrivateFile)) + require.NoError(t, os.WriteFile(workDir+"/sub/deep.txt", nil, fileperms.PrivateFile)) + require.NoError(t, os.WriteFile(workDir+"/sub/other.md", nil, fileperms.PrivateFile)) + + t.Run("simple glob", func(t *testing.T) { + matches, err := globFilesInDir(workDir, "*.txt") + require.NoError(t, err) + require.Len(t, matches, 1) + }) + + t.Run("doublestar glob", func(t *testing.T) { + matches, err := globFilesInDir(workDir, "**/*.txt") + require.NoError(t, err) + require.Len(t, matches, 2) + }) + + t.Run("no matches", func(t *testing.T) { + matches, err := globFilesInDir(workDir, "*.rs") + require.NoError(t, err) + assert.Empty(t, matches) + }) +} + +func TestTarballFileRemove(t *testing.T) { + workDir := t.TempDir() + + require.NoError(t, os.WriteFile(workDir+"/keep.txt", []byte("keep"), fileperms.PrivateFile)) + require.NoError(t, os.WriteFile(workDir+"/remove.conf", []byte("remove"), fileperms.PrivateFile)) + + err := tarballFileRemove(workDir, "*.conf") + require.NoError(t, err) + + assert.FileExists(t, workDir+"/keep.txt") + assert.NoFileExists(t, workDir+"/remove.conf") +} + +func TestTarballFileRemoveNoMatch(t *testing.T) { + workDir := t.TempDir() + + require.NoError(t, os.WriteFile(workDir+"/file.txt", nil, fileperms.PrivateFile)) + + err := tarballFileRemove(workDir, "*.conf") + require.Error(t, err) + assert.ErrorIs(t, err, ErrOverlayDidNotApply) +} + +func TestTarballSearchReplace(t *testing.T) { + workDir := t.TempDir() + + require.NoError(t, os.WriteFile(workDir+"/config.h", []byte("#define OLD_VALUE 1\n"), fileperms.PrivateFile)) + + err := tarballSearchReplace(workDir, "config.h", "OLD_VALUE", "NEW_VALUE") + require.NoError(t, err) + + content, readErr := os.ReadFile(workDir + "/config.h") + require.NoError(t, readErr) + assert.Equal(t, "#define NEW_VALUE 1\n", string(content)) +} + +func TestTarballSearchReplaceNoMatch(t *testing.T) { + workDir := t.TempDir() + + require.NoError(t, os.WriteFile(workDir+"/config.h", []byte("#define SOMETHING 1\n"), fileperms.PrivateFile)) + + err := tarballSearchReplace(workDir, "config.h", "NONEXISTENT", "new") + require.Error(t, err) + assert.ErrorIs(t, err, ErrOverlayDidNotApply) +} diff --git a/internal/projectconfig/overlay.go b/internal/projectconfig/overlay.go index 7ec0b50e..1fa8fcc2 100644 --- a/internal/projectconfig/overlay.go +++ b/internal/projectconfig/overlay.go @@ -17,10 +17,13 @@ import ( // ComponentOverlay represents an overlay that may be applied to a component's spec and/or its sources. type ComponentOverlay struct { // The type of overlay to apply. - Type ComponentOverlayType `toml:"type" json:"type" validate:"required" jsonschema:"enum=spec-add-tag,enum=spec-insert-tag,enum=spec-set-tag,enum=spec-update-tag,enum=spec-remove-tag,enum=spec-prepend-lines,enum=spec-append-lines,enum=spec-search-replace,enum=spec-remove-section,enum=spec-remove-subpackage,enum=patch-add,enum=patch-remove,enum=file-prepend-lines,enum=file-search-replace,enum=file-add,enum=file-remove,enum=file-rename,title=Overlay type,description=The type of overlay to apply"` + Type ComponentOverlayType `toml:"type" json:"type" validate:"required" jsonschema:"enum=spec-add-tag,enum=spec-insert-tag,enum=spec-set-tag,enum=spec-update-tag,enum=spec-remove-tag,enum=spec-prepend-lines,enum=spec-append-lines,enum=spec-search-replace,enum=spec-remove-section,enum=spec-remove-subpackage,enum=patch-add,enum=patch-remove,enum=file-prepend-lines,enum=file-search-replace,enum=file-add,enum=file-remove,enum=file-rename,enum=tarball-file-remove,enum=tarball-search-replace,enum=tarball-patch,title=Overlay type,description=The type of overlay to apply"` // Human readable description of overlay; primarily present to document the need for the change. Description string `toml:"description,omitempty" json:"description,omitempty" jsonschema:"title=Description,description=Human readable description of overlay" fingerprint:"-"` + // For overlays that target files inside a source tarball, identifies the tarball to modify. + // Must be a filename (not a path) matching a source archive in the component's sources directory. + Tarball string `toml:"tarball,omitempty" json:"tarball,omitempty" jsonschema:"title=Tarball,description=The source tarball to modify (e.g. pkg-1.0.tar.gz)"` // For overlays that apply to non-spec files, indicates the filename. For overlays that can // apply to multiple files, supports glob patterns (including globstar). Filename string `toml:"file,omitempty" json:"file,omitempty" jsonschema:"title=Filename,description=The name of the non-spec file to which this overlay applies, or a glob pattern matching multiple files"` @@ -119,6 +122,14 @@ func (c *ComponentOverlay) ModifiesSpec() bool { c.Type == ComponentOverlayRemovePatch } +// ModifiesTarball returns true if the overlay modifies files inside a source tarball. +// These overlays require a mock chroot for extraction and repacking. +func (c *ComponentOverlay) ModifiesTarball() bool { + return c.Type == ComponentOverlayTarballFileRemove || + c.Type == ComponentOverlayTarballSearchReplace || + c.Type == ComponentOverlayTarballPatch +} + // ModifiesNonSpecFiles returns true if the overlay modifies non-spec files. This includes // hybrid overlays that modify both spec and source files (e.g., patch overlays), since // those also require non-spec modifications. @@ -182,12 +193,21 @@ const ( ComponentOverlayRemoveFile ComponentOverlayType = "file-remove" // ComponentOverlayRenameFile is an overlay that renames a non-spec file. ComponentOverlayRenameFile ComponentOverlayType = "file-rename" + // ComponentOverlayTarballFileRemove is an overlay that removes file(s) from inside a source tarball. + // The tarball is extracted, matching files are deleted, and the tarball is repacked. + ComponentOverlayTarballFileRemove ComponentOverlayType = "tarball-file-remove" + // ComponentOverlayTarballSearchReplace is an overlay that performs regex search-and-replace + // on file(s) inside a source tarball. + ComponentOverlayTarballSearchReplace ComponentOverlayType = "tarball-search-replace" + // ComponentOverlayTarballPatch is an overlay that applies a unified diff patch to the + // extracted contents of a source tarball. + ComponentOverlayTarballPatch ComponentOverlayType = "tarball-patch" ) // Validate checks that required fields are set based on the overlay type. This catches // configuration errors at load time rather than at apply time. // -//nolint:cyclop,gocognit,gocyclo,funlen // complexity is inherent to the number of overlay types. +//nolint:cyclop,gocognit,gocyclo,funlen,maintidx // complexity is inherent to the number of overlay types. func (c *ComponentOverlay) Validate() error { desc := c.Description if desc == "" { @@ -329,6 +349,46 @@ func (c *ComponentOverlay) Validate() error { if err := validateGlobPattern(c.Filename, desc); err != nil { return err } + case ComponentOverlayTarballFileRemove: + if err := requireFileBasename("tarball", c.Tarball); err != nil { + return err + } + + if err := requireRelativePath("file", c.Filename); err != nil { + return err + } + + if err := validateGlobPattern(c.Filename, desc); err != nil { + return err + } + case ComponentOverlayTarballSearchReplace: + if err := requireFileBasename("tarball", c.Tarball); err != nil { + return err + } + + if err := requireRelativePath("file", c.Filename); err != nil { + return err + } + + if err := validateGlobPattern(c.Filename, desc); err != nil { + return err + } + + if c.Regex == "" { + return missingField("regex") + } + + if err := validateRegex(c.Regex, desc); err != nil { + return err + } + case ComponentOverlayTarballPatch: + if err := requireFileBasename("tarball", c.Tarball); err != nil { + return err + } + + if c.Source == "" { + return missingField("source") + } default: return fmt.Errorf("unknown overlay type %#q: %#q", c.Type, desc) } diff --git a/internal/projectconfig/overlay_test.go b/internal/projectconfig/overlay_test.go index 84ccf200..7b75f0e2 100644 --- a/internal/projectconfig/overlay_test.go +++ b/internal/projectconfig/overlay_test.go @@ -412,6 +412,147 @@ func TestComponentOverlay_Validate(t *testing.T) { errorExpected: true, errorContains: "section", }, + // tarball-file-remove tests + { + name: "tarball-file-remove valid", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballFileRemove, + Tarball: "pkg-1.0.tar.gz", + Filename: "unwanted.conf", + }, + errorExpected: false, + }, + { + name: "tarball-file-remove valid with glob", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballFileRemove, + Tarball: "pkg-1.0.tar.gz", + Filename: "docs/**/*.md", + }, + errorExpected: false, + }, + { + name: "tarball-file-remove missing tarball", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballFileRemove, + Filename: "unwanted.conf", + }, + errorExpected: true, + errorContains: "tarball", + }, + { + name: "tarball-file-remove missing file", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballFileRemove, + Tarball: "pkg-1.0.tar.gz", + }, + errorExpected: true, + errorContains: "file", + }, + { + name: "tarball-file-remove rejects tarball path", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballFileRemove, + Tarball: "subdir/pkg-1.0.tar.gz", + Filename: "unwanted.conf", + }, + errorExpected: true, + errorContains: "tarball", + }, + // tarball-search-replace tests + { + name: "tarball-search-replace valid", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballSearchReplace, + Tarball: "pkg-1.0.tar.gz", + Filename: "config.h", + Regex: "old_value", + Replacement: "new_value", + }, + errorExpected: false, + }, + { + name: "tarball-search-replace missing tarball", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballSearchReplace, + Filename: "config.h", + Regex: "old_value", + Replacement: "new_value", + }, + errorExpected: true, + errorContains: "tarball", + }, + { + name: "tarball-search-replace missing file", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballSearchReplace, + Tarball: "pkg-1.0.tar.gz", + Regex: "old_value", + Replacement: "new_value", + }, + errorExpected: true, + errorContains: "file", + }, + { + name: "tarball-search-replace missing regex", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballSearchReplace, + Tarball: "pkg-1.0.tar.gz", + Filename: "config.h", + }, + errorExpected: true, + errorContains: "regex", + }, + { + name: "tarball-search-replace invalid regex", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballSearchReplace, + Tarball: "pkg-1.0.tar.gz", + Filename: "config.h", + Regex: "[invalid", + Replacement: "new_value", + }, + errorExpected: true, + errorContains: "regex", + }, + // tarball-patch tests + { + name: "tarball-patch valid", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballPatch, + Tarball: "pkg-1.0.tar.gz", + Source: "patches/fix.patch", + }, + errorExpected: false, + }, + { + name: "tarball-patch valid with strip level", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballPatch, + Tarball: "pkg-1.0.tar.gz", + Source: "patches/fix.patch", + Value: "2", + }, + errorExpected: false, + }, + { + name: "tarball-patch missing tarball", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballPatch, + Source: "patches/fix.patch", + }, + errorExpected: true, + errorContains: "tarball", + }, + { + name: "tarball-patch missing source", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayTarballPatch, + Tarball: "pkg-1.0.tar.gz", + }, + errorExpected: true, + errorContains: "source", + }, } for _, testCase := range testCases { @@ -455,6 +596,12 @@ func TestComponentOverlay_ModifiesSpec(t *testing.T) { projectconfig.ComponentOverlayAddFile, } + tarballOverlayTypes := []projectconfig.ComponentOverlayType{ + projectconfig.ComponentOverlayTarballFileRemove, + projectconfig.ComponentOverlayTarballSearchReplace, + projectconfig.ComponentOverlayTarballPatch, + } + for _, overlayType := range specOverlayTypes { t.Run(string(overlayType)+"_is_spec_overlay", func(t *testing.T) { overlay := projectconfig.ComponentOverlay{Type: overlayType} @@ -468,4 +615,13 @@ func TestComponentOverlay_ModifiesSpec(t *testing.T) { assert.False(t, overlay.ModifiesSpec(), "expected %s to not be a spec overlay", overlayType) }) } + + for _, overlayType := range tarballOverlayTypes { + t.Run(string(overlayType)+"_is_tarball_overlay", func(t *testing.T) { + overlay := projectconfig.ComponentOverlay{Type: overlayType} + assert.True(t, overlay.ModifiesTarball(), "expected %s to be a tarball overlay", overlayType) + assert.False(t, overlay.ModifiesSpec(), "expected %s to not be a spec overlay", overlayType) + assert.False(t, overlay.ModifiesNonSpecFiles(), "expected %s to not be a non-spec overlay", overlayType) + }) + } }