diff --git a/cmd/api/api/images_test.go b/cmd/api/api/images_test.go index 86d1ff9e..6613f7f2 100644 --- a/cmd/api/api/images_test.go +++ b/cmd/api/api/images_test.go @@ -225,12 +225,14 @@ func TestCreateImage_Idempotent(t *testing.T) { t.Fatal("Build failed - this is the root cause of test failures") } - // Status can be "pending" (still processing) or "ready" (already completed in fast CI) + // Status can be any in-progress state or ready (already completed in fast CI) // The key idempotency invariant is that the digest is the same (verified above) require.Contains(t, []oapi.ImageStatus{ oapi.ImageStatus(images.StatusPending), + oapi.ImageStatus(images.StatusPulling), + oapi.ImageStatus(images.StatusConverting), oapi.ImageStatus(images.StatusReady), - }, img2.Status, "status should be pending or ready") + }, img2.Status, "status should be pending, pulling, converting, or ready") // If still pending, should have queue position if img2.Status == oapi.ImageStatus(images.StatusPending) { diff --git a/lib/builds/builder_agent/main.go b/lib/builds/builder_agent/main.go index a74b2939..799a39eb 100644 --- a/lib/builds/builder_agent/main.go +++ b/lib/builds/builder_agent/main.go @@ -1180,7 +1180,17 @@ func createErofsFromRegistry(config *BuildConfig, digest string) string { log.Printf("Image has %d layers, extracting...", len(layers)) - // Download and extract each layer + // Download and extract each layer with proper OCI whiteout handling. + // + // OCI whiteout semantics require careful ordering: + // - Opaque whiteouts (.wh..wh..opq): "replace this directory entirely with + // this layer's contents." Must clear the directory BEFORE extracting the + // layer, otherwise the layer's own files get deleted. + // - Regular whiteouts (.wh.foo): "delete foo from lower layers." Safe to + // process AFTER extracting the layer. + // + // Approach: download blob to temp file, pre-scan tar for opaque whiteouts, + // clear those directories, extract the full tar, then handle regular whiteouts. for i, layer := range layers { blobURL := fmt.Sprintf("%s/v2/%s/blobs/%s", baseURL, repo, layer.Digest) blobReq, err := http.NewRequest("GET", blobURL, nil) @@ -1203,56 +1213,88 @@ func createErofsFromRegistry(config *BuildConfig, digest string) string { return "" } - // Determine decompression based on media type - tarFlags := "-xf" - if strings.Contains(layer.MediaType, "gzip") { - tarFlags = "-xzf" + // Save blob to temp file so we can scan it for whiteouts before extracting + blobFile, err := os.CreateTemp("", "layer-*.blob") + if err != nil { + blobResp.Body.Close() + log.Printf("Warning: erofs creation failed (create temp for layer %d): %v", i, err) + return "" } - // For zstd, use zstd pipe + if _, err := io.Copy(blobFile, blobResp.Body); err != nil { + blobResp.Body.Close() + blobFile.Close() + os.Remove(blobFile.Name()) + log.Printf("Warning: erofs creation failed (save layer %d): %v", i, err) + return "" + } + blobResp.Body.Close() + blobFile.Close() + blobPath := blobFile.Name() + + // Build the decompression + tar pipeline command based on media type + var decompressListCmd, decompressExtractCmd string if strings.Contains(layer.MediaType, "zstd") { - // Use zstd decompression via pipe - tarCmd := exec.Command("sh", "-c", fmt.Sprintf("zstd -d | tar -xf - -C %s", exportDir)) - tarCmd.Stdin = blobResp.Body - if out, err := tarCmd.CombinedOutput(); err != nil { - blobResp.Body.Close() - log.Printf("Warning: erofs creation failed (extract zstd layer %d): %v: %s", i, err, out) - return "" - } + decompressListCmd = fmt.Sprintf("zstd -d < %s | tar -tf -", blobPath) + decompressExtractCmd = fmt.Sprintf("zstd -d < %s | tar -xf - -C %s", blobPath, exportDir) + } else if strings.Contains(layer.MediaType, "gzip") { + decompressListCmd = fmt.Sprintf("tar -tzf %s", blobPath) + decompressExtractCmd = fmt.Sprintf("tar -xzf %s -C %s", blobPath, exportDir) } else { - tarCmd := exec.Command("tar", tarFlags, "-", "-C", exportDir) - tarCmd.Stdin = blobResp.Body - if out, err := tarCmd.CombinedOutput(); err != nil { - blobResp.Body.Close() - log.Printf("Warning: erofs creation failed (extract layer %d): %v: %s", i, err, out) - return "" + decompressListCmd = fmt.Sprintf("tar -tf %s", blobPath) + decompressExtractCmd = fmt.Sprintf("tar -xf %s -C %s", blobPath, exportDir) + } + + // Pre-scan: list tar entries to find opaque whiteouts (.wh..wh..opq). + // These must be processed BEFORE extraction so the layer's own files + // in that directory are preserved. + listCmd := exec.Command("sh", "-c", decompressListCmd) + listOut, err := listCmd.Output() + if err != nil { + os.Remove(blobPath) + log.Printf("Warning: erofs creation failed (list layer %d): %v", i, err) + return "" + } + for _, entry := range strings.Split(string(listOut), "\n") { + entry = strings.TrimPrefix(entry, "./") + if strings.HasSuffix(entry, "/.wh..wh..opq") || entry == ".wh..wh..opq" { + // Opaque whiteout: clear the target directory before extraction + opaqueDir := filepath.Join(exportDir, filepath.Dir(entry)) + if info, err := os.Stat(opaqueDir); err == nil && info.IsDir() { + entries, _ := os.ReadDir(opaqueDir) + for _, e := range entries { + os.RemoveAll(filepath.Join(opaqueDir, e.Name())) + } + } } } - blobResp.Body.Close() + + // Extract the full layer + extractCmd := exec.Command("sh", "-c", decompressExtractCmd) + if out, err := extractCmd.CombinedOutput(); err != nil { + os.Remove(blobPath) + log.Printf("Warning: erofs creation failed (extract layer %d): %v: %s", i, err, out) + return "" + } + os.Remove(blobPath) log.Printf(" Layer %d/%d extracted (%d bytes)", i+1, len(layers), layer.Size) - // Process OCI whiteout files for THIS layer before extracting the next. - // Whiteouts must be applied per-layer: a whiteout in layer N deletes files - // from layers 0..N-1, but must not affect files added by layers N+1..last. + // Post-extract: process regular whiteouts and clean up opaque whiteout markers. + // Regular whiteouts (.wh.foo) delete a specific file from lower layers. + // Since we extract the full layer first, the whiteout marker and its target + // may both exist — remove both. filepath.Walk(exportDir, func(path string, info os.FileInfo, err error) error { if err != nil { return nil } name := info.Name() - if strings.HasPrefix(name, ".wh.") { - if name == ".wh..wh..opq" { - // Opaque whiteout: remove all siblings - dir := filepath.Dir(path) - entries, _ := os.ReadDir(dir) - for _, e := range entries { - if e.Name() != ".wh..wh..opq" { - os.RemoveAll(filepath.Join(dir, e.Name())) - } - } - } else { - // Regular whiteout: remove the target file - target := filepath.Join(filepath.Dir(path), strings.TrimPrefix(name, ".wh.")) - os.RemoveAll(target) - } + if name == ".wh..wh..opq" { + // Opaque whiteout marker: just remove the marker file. + // The directory was already cleared before extraction. + os.Remove(path) + } else if strings.HasPrefix(name, ".wh.") { + // Regular whiteout: remove the target file from lower layers + target := filepath.Join(filepath.Dir(path), strings.TrimPrefix(name, ".wh.")) + os.RemoveAll(target) os.Remove(path) } return nil @@ -1268,10 +1310,29 @@ func createErofsFromRegistry(config *BuildConfig, digest string) string { return "" } - // Sync to ensure the erofs file is flushed to the block device before - // the host tries to mount and read the source volume. - syncCmd := exec.Command("sync") - syncCmd.Run() + // Ensure the erofs file is fully flushed to the block device before the + // host reads the source volume. sync(2) alone is insufficient — it only + // queues writes. We need fsync on the file AND the directory entry, then + // a global sync to push through the virtio-blk layer. + if f, err := os.Open(erofsDst); err == nil { + f.Sync() // fsync the file data + metadata + f.Close() + } + if d, err := os.Open(config.SourcePath); err == nil { + d.Sync() // fsync the directory entry + d.Close() + } + // Unmount the source volume filesystem to force all buffered writes + // through to the block device. Re-mount is not needed since we're done. + // Change cwd away from the volume first so umount can succeed. + os.Chdir("/") + umountCmd := exec.Command("umount", config.SourcePath) + if out, err := umountCmd.CombinedOutput(); err != nil { + log.Printf("Warning: umount source failed (sync fallback): %v: %s", err, out) + // Fall back to sync if umount fails + exec.Command("sync").Run() + time.Sleep(500 * time.Millisecond) + } elapsed := time.Since(start) log.Printf("erofs disk created at %s in %v", erofsDst, elapsed)