Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cmd/api/api/images_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,14 @@ func TestCreateImage_Idempotent(t *testing.T) {
t.Fatal("Build failed - this is the root cause of test failures")
}

// Status can be "pending" (still processing) or "ready" (already completed in fast CI)
// Status can be any in-progress state or ready (already completed in fast CI)
// The key idempotency invariant is that the digest is the same (verified above)
require.Contains(t, []oapi.ImageStatus{
oapi.ImageStatus(images.StatusPending),
oapi.ImageStatus(images.StatusPulling),
oapi.ImageStatus(images.StatusConverting),
oapi.ImageStatus(images.StatusReady),
}, img2.Status, "status should be pending or ready")
}, img2.Status, "status should be pending, pulling, converting, or ready")

// If still pending, should have queue position
if img2.Status == oapi.ImageStatus(images.StatusPending) {
Expand Down
147 changes: 104 additions & 43 deletions lib/builds/builder_agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,17 @@ func createErofsFromRegistry(config *BuildConfig, digest string) string {

log.Printf("Image has %d layers, extracting...", len(layers))

// Download and extract each layer
// Download and extract each layer with proper OCI whiteout handling.
//
// OCI whiteout semantics require careful ordering:
// - Opaque whiteouts (.wh..wh..opq): "replace this directory entirely with
// this layer's contents." Must clear the directory BEFORE extracting the
// layer, otherwise the layer's own files get deleted.
// - Regular whiteouts (.wh.foo): "delete foo from lower layers." Safe to
// process AFTER extracting the layer.
//
// Approach: download blob to temp file, pre-scan tar for opaque whiteouts,
// clear those directories, extract the full tar, then handle regular whiteouts.
for i, layer := range layers {
blobURL := fmt.Sprintf("%s/v2/%s/blobs/%s", baseURL, repo, layer.Digest)
blobReq, err := http.NewRequest("GET", blobURL, nil)
Expand All @@ -1203,56 +1213,88 @@ func createErofsFromRegistry(config *BuildConfig, digest string) string {
return ""
}

// Determine decompression based on media type
tarFlags := "-xf"
if strings.Contains(layer.MediaType, "gzip") {
tarFlags = "-xzf"
// Save blob to temp file so we can scan it for whiteouts before extracting
blobFile, err := os.CreateTemp("", "layer-*.blob")
if err != nil {
blobResp.Body.Close()
log.Printf("Warning: erofs creation failed (create temp for layer %d): %v", i, err)
return ""
}
// For zstd, use zstd pipe
if _, err := io.Copy(blobFile, blobResp.Body); err != nil {
blobResp.Body.Close()
blobFile.Close()
os.Remove(blobFile.Name())
log.Printf("Warning: erofs creation failed (save layer %d): %v", i, err)
return ""
}
blobResp.Body.Close()
blobFile.Close()
blobPath := blobFile.Name()

// Build the decompression + tar pipeline command based on media type
var decompressListCmd, decompressExtractCmd string
if strings.Contains(layer.MediaType, "zstd") {
// Use zstd decompression via pipe
tarCmd := exec.Command("sh", "-c", fmt.Sprintf("zstd -d | tar -xf - -C %s", exportDir))
tarCmd.Stdin = blobResp.Body
if out, err := tarCmd.CombinedOutput(); err != nil {
blobResp.Body.Close()
log.Printf("Warning: erofs creation failed (extract zstd layer %d): %v: %s", i, err, out)
return ""
}
decompressListCmd = fmt.Sprintf("zstd -d < %s | tar -tf -", blobPath)
decompressExtractCmd = fmt.Sprintf("zstd -d < %s | tar -xf - -C %s", blobPath, exportDir)
} else if strings.Contains(layer.MediaType, "gzip") {
decompressListCmd = fmt.Sprintf("tar -tzf %s", blobPath)
decompressExtractCmd = fmt.Sprintf("tar -xzf %s -C %s", blobPath, exportDir)
} else {
tarCmd := exec.Command("tar", tarFlags, "-", "-C", exportDir)
tarCmd.Stdin = blobResp.Body
if out, err := tarCmd.CombinedOutput(); err != nil {
blobResp.Body.Close()
log.Printf("Warning: erofs creation failed (extract layer %d): %v: %s", i, err, out)
return ""
decompressListCmd = fmt.Sprintf("tar -tf %s", blobPath)
decompressExtractCmd = fmt.Sprintf("tar -xf %s -C %s", blobPath, exportDir)
}

// Pre-scan: list tar entries to find opaque whiteouts (.wh..wh..opq).
// These must be processed BEFORE extraction so the layer's own files
// in that directory are preserved.
listCmd := exec.Command("sh", "-c", decompressListCmd)
listOut, err := listCmd.Output()
if err != nil {
os.Remove(blobPath)
log.Printf("Warning: erofs creation failed (list layer %d): %v", i, err)
return ""
}
for _, entry := range strings.Split(string(listOut), "\n") {
entry = strings.TrimPrefix(entry, "./")
if strings.HasSuffix(entry, "/.wh..wh..opq") || entry == ".wh..wh..opq" {
// Opaque whiteout: clear the target directory before extraction
opaqueDir := filepath.Join(exportDir, filepath.Dir(entry))
if info, err := os.Stat(opaqueDir); err == nil && info.IsDir() {
entries, _ := os.ReadDir(opaqueDir)
for _, e := range entries {
os.RemoveAll(filepath.Join(opaqueDir, e.Name()))
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Path traversal in pre-scan enables directory clearing outside exportDir

Low Severity

The pre-scan processes raw tar -tf output entries without sanitizing path traversal sequences. filepath.Join(exportDir, filepath.Dir(entry)) with an entry containing ../ resolves outside exportDir — e.g. a crafted entry like ../../../etc/.wh..wh..opq resolves opaqueDir to /etc, causing os.RemoveAll on every entry in that directory. Notably, tar -tf shows raw archive paths (including ../) while tar -xf strips them by default, creating an asymmetry where the pre-scan clears directories that extraction would never actually touch.

Fix in Cursor Fix in Web

}
}
blobResp.Body.Close()

// Extract the full layer
extractCmd := exec.Command("sh", "-c", decompressExtractCmd)
if out, err := extractCmd.CombinedOutput(); err != nil {
os.Remove(blobPath)
log.Printf("Warning: erofs creation failed (extract layer %d): %v: %s", i, err, out)
return ""
}
os.Remove(blobPath)
log.Printf(" Layer %d/%d extracted (%d bytes)", i+1, len(layers), layer.Size)

// Process OCI whiteout files for THIS layer before extracting the next.
// Whiteouts must be applied per-layer: a whiteout in layer N deletes files
// from layers 0..N-1, but must not affect files added by layers N+1..last.
// Post-extract: process regular whiteouts and clean up opaque whiteout markers.
// Regular whiteouts (.wh.foo) delete a specific file from lower layers.
// Since we extract the full layer first, the whiteout marker and its target
// may both exist — remove both.
filepath.Walk(exportDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
name := info.Name()
if strings.HasPrefix(name, ".wh.") {
if name == ".wh..wh..opq" {
// Opaque whiteout: remove all siblings
dir := filepath.Dir(path)
entries, _ := os.ReadDir(dir)
for _, e := range entries {
if e.Name() != ".wh..wh..opq" {
os.RemoveAll(filepath.Join(dir, e.Name()))
}
}
} else {
// Regular whiteout: remove the target file
target := filepath.Join(filepath.Dir(path), strings.TrimPrefix(name, ".wh."))
os.RemoveAll(target)
}
if name == ".wh..wh..opq" {
// Opaque whiteout marker: just remove the marker file.
// The directory was already cleared before extraction.
os.Remove(path)
} else if strings.HasPrefix(name, ".wh.") {
// Regular whiteout: remove the target file from lower layers
target := filepath.Join(filepath.Dir(path), strings.TrimPrefix(name, ".wh."))
os.RemoveAll(target)
os.Remove(path)
}
return nil
Expand All @@ -1268,10 +1310,29 @@ func createErofsFromRegistry(config *BuildConfig, digest string) string {
return ""
}

// Sync to ensure the erofs file is flushed to the block device before
// the host tries to mount and read the source volume.
syncCmd := exec.Command("sync")
syncCmd.Run()
// Ensure the erofs file is fully flushed to the block device before the
// host reads the source volume. sync(2) alone is insufficient — it only
// queues writes. We need fsync on the file AND the directory entry, then
// a global sync to push through the virtio-blk layer.
if f, err := os.Open(erofsDst); err == nil {
f.Sync() // fsync the file data + metadata
f.Close()
}
if d, err := os.Open(config.SourcePath); err == nil {
d.Sync() // fsync the directory entry
d.Close()
}
// Unmount the source volume filesystem to force all buffered writes
// through to the block device. Re-mount is not needed since we're done.
// Change cwd away from the volume first so umount can succeed.
os.Chdir("/")
umountCmd := exec.Command("umount", config.SourcePath)
if out, err := umountCmd.CombinedOutput(); err != nil {
log.Printf("Warning: umount source failed (sync fallback): %v: %s", err, out)
// Fall back to sync if umount fails
exec.Command("sync").Run()
time.Sleep(500 * time.Millisecond)
}

elapsed := time.Since(start)
log.Printf("erofs disk created at %s in %v", erofsDst, elapsed)
Expand Down