Skip to content

Commit 7e4908e

Browse files
committed
fix: address P1/P2 review findings from PR #16
P1 GC: Implement RegisterGC for FC backend — protects blob IDs referenced by FC VMs from garbage collection, mirroring CH's GC module. P1 Clone paths: Save cocoon.json metadata (StorageConfigs + BootConfig) in snapshot tar. Create temporary symlinks from source drive paths to clone paths before snapshot/load so FC finds drives at expected locations. Symlinks are cleaned up after load + reconfigure. P2 Rebuild: Replace fragile rebuildFromSnapshot (searched live VM records) with self-contained metadata from cocoon.json. Clones no longer depend on the source VM or any sibling VM existing in the DB. P2 Console relay: Add 3s timeout on second goroutine wait after client disconnect to prevent blocking the accept loop when PTY read is stuck.
1 parent bcb1e6d commit 7e4908e

5 files changed

Lines changed: 216 additions & 80 deletions

File tree

hypervisor/firecracker/clone.go

Lines changed: 61 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -65,36 +65,34 @@ func (fc *Firecracker) cloneSetup(ctx context.Context, vmID string, vmCfg *types
6565
func (fc *Firecracker) cloneAfterExtract(ctx context.Context, vmID string, vmCfg *types.VMConfig, networkConfigs []*types.NetworkConfig, runDir, logDir string, now time.Time) (*types.VM, error) {
6666
logger := log.WithFunc("firecracker.Clone")
6767

68-
// Rebuild storage/boot configs from the snapshot's cow.raw and existing record context.
69-
// FC stores StorageConfigs on the VMRecord (not in a config.json like CH).
70-
// We need to find the COW file in the snapshot and update paths.
68+
// Read snapshot metadata (cocoon.json) to reconstruct storage/boot config.
69+
// This makes the clone self-contained — no dependency on live VM records.
70+
meta, err := loadSnapshotMeta(runDir)
71+
if err != nil {
72+
return nil, fmt.Errorf("load snapshot metadata: %w", err)
73+
}
74+
7175
cowPath := fc.conf.COWRawPath(vmID)
7276
snapshotCOW := filepath.Join(runDir, cowFileName)
73-
74-
// Move the extracted COW to its canonical location.
75-
if err := os.Rename(snapshotCOW, cowPath); err != nil {
76-
return nil, fmt.Errorf("move COW to canonical path: %w", err)
77+
if renameErr := os.Rename(snapshotCOW, cowPath); renameErr != nil {
78+
return nil, fmt.Errorf("move COW to canonical path: %w", renameErr)
7779
}
7880

79-
// Rebuild storage configs: read-only layers from snapshot config (via blob IDs),
80-
// plus the new COW disk.
81-
storageConfigs, bootCfg, blobIDs, err := fc.rebuildFromSnapshot(ctx, vmID, vmCfg, cowPath)
82-
if err != nil {
83-
return nil, fmt.Errorf("rebuild from snapshot: %w", err)
84-
}
81+
// Rebuild storage configs: reuse layer paths from metadata, update COW path.
82+
storageConfigs := rebuildCloneStorage(meta, cowPath)
83+
bootCfg := meta.BootConfig
84+
blobIDs := hypervisor.ExtractBlobIDs(storageConfigs, bootCfg)
8585

8686
if verifyErr := verifyBaseFiles(storageConfigs, bootCfg); verifyErr != nil {
8787
return nil, fmt.Errorf("verify base files: %w", verifyErr)
8888
}
8989

90-
// Expand COW if vmCfg requests larger storage.
9190
if vmCfg.Storage > 0 {
9291
if expandErr := expandRawImage(cowPath, vmCfg.Storage); expandErr != nil {
9392
return nil, fmt.Errorf("resize COW: %w", expandErr)
9493
}
9594
}
9695

97-
// Update bootCfg.Cmdline for the new clone (new VM name, IP, DNS).
9896
if bootCfg != nil {
9997
dns, dnsErr := fc.conf.DNSServers()
10098
if dnsErr != nil {
@@ -103,9 +101,13 @@ func (fc *Firecracker) cloneAfterExtract(ctx context.Context, vmID string, vmCfg
103101
bootCfg.Cmdline = buildCmdline(storageConfigs, networkConfigs, vmCfg.Name, dns)
104102
}
105103

106-
// Launch FC process, load snapshot, configure drives, resume.
107-
sockPath := hypervisor.SocketPath(runDir)
104+
// FC snapshot/load requires drives at the same paths as the source.
105+
// Read-only layers are shared blobs (same path). COW changed path.
106+
// Create a temp symlink from the source COW path -> clone COW so load succeeds.
107+
symlinks := createDriveSymlinks(meta.StorageConfigs, storageConfigs)
108+
defer cleanupSymlinks(symlinks)
108109

110+
sockPath := hypervisor.SocketPath(runDir)
109111
withNetwork := len(networkConfigs) > 0
110112
pid, err := fc.launchProcess(ctx, &hypervisor.VMRecord{
111113
VM: types.VM{NetworkConfigs: networkConfigs},
@@ -121,16 +123,10 @@ func (fc *Firecracker) cloneAfterExtract(ctx context.Context, vmID string, vmCfg
121123
return nil, err
122124
}
123125

124-
// Finalize record -> Running.
125126
info := types.VM{
126-
ID: vmID,
127-
State: types.VMStateRunning,
128-
Config: *vmCfg,
129-
StorageConfigs: storageConfigs,
130-
NetworkConfigs: networkConfigs,
131-
CreatedAt: now,
132-
UpdatedAt: now,
133-
StartedAt: &now,
127+
ID: vmID, State: types.VMStateRunning,
128+
Config: *vmCfg, StorageConfigs: storageConfigs, NetworkConfigs: networkConfigs,
129+
CreatedAt: now, UpdatedAt: now, StartedAt: &now,
134130
}
135131
if err := fc.DB.Update(ctx, func(idx *hypervisor.VMIndex) error {
136132
r := idx.VMs[vmID]
@@ -169,13 +165,9 @@ func (fc *Firecracker) restoreAndResumeClone(
169165
return fmt.Errorf("snapshot/load: %w", err)
170166
}
171167

172-
// Re-configure drives after snapshot load.
173-
// FC snapshot/load does NOT preserve drive config; drives must be re-attached.
174168
if err = fc.reconfigureDrives(ctx, hc, storageConfigs); err != nil {
175169
return fmt.Errorf("reconfigure drives: %w", err)
176170
}
177-
178-
// Re-configure network interfaces for the clone (new TAP devices, new MACs).
179171
if err = fc.reconfigureNetworks(ctx, hc, networkConfigs); err != nil {
180172
return fmt.Errorf("reconfigure networks: %w", err)
181173
}
@@ -186,56 +178,50 @@ func (fc *Firecracker) restoreAndResumeClone(
186178
return nil
187179
}
188180

189-
// rebuildFromSnapshot reconstructs StorageConfigs, BootConfig, and blob IDs
190-
// from the VM's image (looked up via vmCfg.Image) plus the new COW path.
191-
// FC only supports OCI (direct boot), so we always have a kernel+initrd+layers.
192-
func (fc *Firecracker) rebuildFromSnapshot(ctx context.Context, _ string, vmCfg *types.VMConfig, cowPath string) ([]*types.StorageConfig, *types.BootConfig, map[string]struct{}, error) {
193-
// Look up the original VM that was snapshotted to find its storage layout.
194-
// For clone, the snapshot already carried the COW; we need the read-only layers
195-
// which are shared blobs on disk (referenced by the image).
196-
// The caller (cmd layer) already resolved the image and passed storageConfigs
197-
// via snapshotConfig.ImageBlobIDs. We reconstruct from the index.
198-
199-
// Search for any existing VM with the same image to get layer paths.
200-
// This is a fallback; the primary path is through the image resolver at the cmd layer.
201-
var storageConfigs []*types.StorageConfig
202-
var bootCfg *types.BootConfig
203-
204-
if err := fc.DB.With(ctx, func(idx *hypervisor.VMIndex) error {
205-
for _, rec := range idx.VMs {
206-
if rec == nil || rec.Config.Image != vmCfg.Image {
207-
continue
208-
}
209-
// Found a VM with the same image; reuse its read-only layers and boot config.
210-
for _, sc := range rec.StorageConfigs {
211-
if sc.RO {
212-
storageConfigs = append(storageConfigs, &types.StorageConfig{
213-
Path: sc.Path,
214-
RO: true,
215-
Serial: sc.Serial,
216-
})
181+
// rebuildCloneStorage creates new StorageConfigs from snapshot metadata,
182+
// keeping read-only layer paths unchanged and updating the COW path.
183+
func rebuildCloneStorage(meta *snapshotMeta, cowPath string) []*types.StorageConfig {
184+
var configs []*types.StorageConfig
185+
for _, sc := range meta.StorageConfigs {
186+
if sc.RO {
187+
configs = append(configs, &types.StorageConfig{Path: sc.Path, RO: true, Serial: sc.Serial})
188+
}
189+
}
190+
configs = append(configs, &types.StorageConfig{Path: cowPath, RO: false, Serial: CowSerial})
191+
return configs
192+
}
193+
194+
// createDriveSymlinks creates temporary symlinks from source drive paths to
195+
// clone drive paths so FC snapshot/load can find drives at their original locations.
196+
// Only creates symlinks for paths that actually changed (i.e., COW disk).
197+
func createDriveSymlinks(srcConfigs, dstConfigs []*types.StorageConfig) []string {
198+
var symlinks []string
199+
dstPaths := make(map[string]string) // srcPath → dstPath
200+
for i, src := range srcConfigs {
201+
if i < len(dstConfigs) && src.Path != dstConfigs[i].Path {
202+
dstPaths[src.Path] = dstConfigs[i].Path
203+
}
204+
}
205+
for srcPath, dstPath := range dstPaths {
206+
// Only create if source path doesn't already exist (avoid overwriting real files).
207+
if _, err := os.Stat(srcPath); err != nil {
208+
// Ensure parent directory exists for the symlink.
209+
if mkErr := os.MkdirAll(filepath.Dir(srcPath), 0o700); mkErr == nil {
210+
if linkErr := os.Symlink(dstPath, srcPath); linkErr == nil {
211+
symlinks = append(symlinks, srcPath)
217212
}
218213
}
219-
if rec.BootConfig != nil {
220-
b := *rec.BootConfig
221-
bootCfg = &b
222-
}
223-
return nil
224214
}
225-
return fmt.Errorf("no VM with image %q found for layer reference", vmCfg.Image)
226-
}); err != nil {
227-
return nil, nil, nil, err
228215
}
216+
return symlinks
217+
}
229218

230-
// Append the new COW disk.
231-
storageConfigs = append(storageConfigs, &types.StorageConfig{
232-
Path: cowPath,
233-
RO: false,
234-
Serial: CowSerial,
235-
})
236-
237-
blobIDs := hypervisor.ExtractBlobIDs(storageConfigs, bootCfg)
238-
return storageConfigs, bootCfg, blobIDs, nil
219+
func cleanupSymlinks(paths []string) {
220+
for _, p := range paths {
221+
_ = os.Remove(p)
222+
// Clean up parent dir if it was created for the symlink (best-effort).
223+
_ = os.Remove(filepath.Dir(p))
224+
}
239225
}
240226

241227
// reconfigureDrives re-attaches drives after FC snapshot/load.

hypervisor/firecracker/firecracker.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"fmt"
77

88
"github.com/cocoonstack/cocoon/config"
9-
"github.com/cocoonstack/cocoon/gc"
109
"github.com/cocoonstack/cocoon/hypervisor"
1110
"github.com/cocoonstack/cocoon/lock/flock"
1211
storejson "github.com/cocoonstack/cocoon/storage/json"
@@ -83,5 +82,3 @@ func (fc *Firecracker) Delete(ctx context.Context, refs []string, force bool) ([
8382
})
8483
})
8584
}
86-
87-
func (fc *Firecracker) RegisterGC(_ *gc.Orchestrator) {}

hypervisor/firecracker/gc.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package firecracker
2+
3+
import (
4+
"context"
5+
"errors"
6+
"slices"
7+
"time"
8+
9+
"github.com/cocoonstack/cocoon/gc"
10+
"github.com/cocoonstack/cocoon/hypervisor"
11+
"github.com/cocoonstack/cocoon/types"
12+
"github.com/cocoonstack/cocoon/utils"
13+
)
14+
15+
const creatingStateGCGrace = 24 * time.Hour
16+
17+
type fcSnapshot struct {
18+
blobIDs map[string]struct{}
19+
vmIDs map[string]struct{}
20+
staleCreate []string
21+
runDirs []string
22+
logDirs []string
23+
}
24+
25+
func (s fcSnapshot) UsedBlobIDs() map[string]struct{} { return s.blobIDs }
26+
func (s fcSnapshot) ActiveVMIDs() map[string]struct{} { return s.vmIDs }
27+
28+
// GCModule returns the GC module for cross-module blob pinning and orphan cleanup.
29+
func (fc *Firecracker) GCModule() gc.Module[fcSnapshot] {
30+
return gc.Module[fcSnapshot]{
31+
Name: typ,
32+
Locker: fc.Locker,
33+
ReadDB: func(_ context.Context) (fcSnapshot, error) {
34+
var snap fcSnapshot
35+
cutoff := time.Now().Add(-creatingStateGCGrace)
36+
if err := fc.DB.ReadRaw(func(idx *hypervisor.VMIndex) error {
37+
snap.blobIDs = make(map[string]struct{})
38+
snap.vmIDs = make(map[string]struct{})
39+
for id, rec := range idx.VMs {
40+
if rec == nil {
41+
continue
42+
}
43+
snap.vmIDs[id] = struct{}{}
44+
for hex := range rec.ImageBlobIDs {
45+
snap.blobIDs[hex] = struct{}{}
46+
}
47+
if rec.State == types.VMStateCreating && rec.UpdatedAt.Before(cutoff) {
48+
snap.staleCreate = append(snap.staleCreate, id)
49+
}
50+
}
51+
return nil
52+
}); err != nil {
53+
return snap, err
54+
}
55+
var err error
56+
if snap.runDirs, err = utils.ScanSubdirs(fc.conf.RunDir()); err != nil {
57+
return snap, err
58+
}
59+
if snap.logDirs, err = utils.ScanSubdirs(fc.conf.LogDir()); err != nil {
60+
return snap, err
61+
}
62+
return snap, nil
63+
},
64+
Resolve: func(snap fcSnapshot, _ map[string]any) []string {
65+
reserved := map[string]struct{}{"db": {}}
66+
runOrphans := utils.FilterUnreferenced(snap.runDirs, snap.vmIDs, reserved)
67+
logOrphans := utils.FilterUnreferenced(snap.logDirs, snap.vmIDs, reserved)
68+
candidates := slices.Concat(runOrphans, logOrphans, snap.staleCreate)
69+
slices.Sort(candidates)
70+
return slices.Compact(candidates)
71+
},
72+
Collect: func(ctx context.Context, ids []string) error {
73+
var errs []error
74+
for _, id := range ids {
75+
runDir, logDir := fc.conf.VMRunDir(id), fc.conf.VMLogDir(id)
76+
if rec, loadErr := fc.LoadRecord(ctx, id); loadErr == nil {
77+
runDir, logDir = rec.RunDir, rec.LogDir
78+
}
79+
if err := hypervisor.RemoveVMDirs(runDir, logDir); err != nil {
80+
errs = append(errs, err)
81+
}
82+
}
83+
if err := fc.cleanStalePlaceholders(ctx, ids); err != nil {
84+
errs = append(errs, err)
85+
}
86+
return errors.Join(errs...)
87+
},
88+
}
89+
}
90+
91+
// RegisterGC registers the Firecracker GC module with the given Orchestrator.
92+
func (fc *Firecracker) RegisterGC(orch *gc.Orchestrator) {
93+
gc.Register(orch, fc.GCModule())
94+
}
95+
96+
func (fc *Firecracker) cleanStalePlaceholders(_ context.Context, ids []string) error {
97+
if len(ids) == 0 {
98+
return nil
99+
}
100+
cutoff := time.Now().Add(-creatingStateGCGrace)
101+
return fc.DB.WriteRaw(func(idx *hypervisor.VMIndex) error {
102+
utils.CleanStaleRecords(idx.VMs, idx.Names, ids,
103+
func(r *hypervisor.VMRecord) string { return r.Config.Name },
104+
func(r *hypervisor.VMRecord) bool {
105+
return r.State == types.VMStateCreating && r.UpdatedAt.Before(cutoff)
106+
},
107+
)
108+
return nil
109+
})
110+
}

hypervisor/firecracker/relay.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,13 @@ func relayBidirectional(ctx context.Context, a io.ReadWriter, b io.ReadWriteClos
9999
case <-done:
100100
case <-ctx.Done():
101101
}
102-
// Close the conn to unblock the other goroutine's io.Copy.
102+
// Close the conn to unblock conn→master io.Copy.
103+
// The master→conn io.Copy may remain blocked on PTY read until the
104+
// guest writes to serial or FC exits. Use a timeout to avoid blocking
105+
// the accept loop indefinitely.
103106
_ = b.Close()
104-
<-done // wait for the second goroutine
107+
select {
108+
case <-done:
109+
case <-time.After(3 * time.Second): //nolint:mnd
110+
}
105111
}

hypervisor/firecracker/snapshot.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package firecracker
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"io"
78
"maps"
@@ -18,6 +19,7 @@ import (
1819
const (
1920
snapshotVMStateFile = "vmstate"
2021
snapshotMemFile = "mem"
22+
snapshotMetaFile = "cocoon.json"
2123
)
2224

2325
// Snapshot pauses the VM, captures its full state (CPU/device state via FC
@@ -88,6 +90,13 @@ func (fc *Firecracker) Snapshot(ctx context.Context, ref string) (*types.Snapsho
8890
return nil, nil, fmt.Errorf("snapshot VM %s: %w", vmID, err)
8991
}
9092

93+
// Save snapshot metadata so clones can reconstruct storage/boot config
94+
// without depending on live VM records.
95+
if metaErr := saveSnapshotMeta(tmpDir, rec.StorageConfigs, rec.BootConfig); metaErr != nil {
96+
os.RemoveAll(tmpDir) //nolint:errcheck,gosec
97+
return nil, nil, fmt.Errorf("save snapshot metadata: %w", metaErr)
98+
}
99+
91100
// Generate snapshot ID and record it on the VM atomically.
92101
snapID, genErr := utils.GenerateID()
93102
if genErr != nil {
@@ -126,3 +135,31 @@ func (fc *Firecracker) Snapshot(ctx context.Context, ref string) (*types.Snapsho
126135

127136
return cfg, utils.TarDirStreamWithRemove(tmpDir), nil
128137
}
138+
139+
// snapshotMeta is persisted as cocoon.json inside the snapshot tar.
140+
// It makes the snapshot self-contained: clones can reconstruct storage/boot
141+
// config without depending on live VM records or image backends.
142+
type snapshotMeta struct {
143+
StorageConfigs []*types.StorageConfig `json:"storage_configs"`
144+
BootConfig *types.BootConfig `json:"boot_config,omitempty"`
145+
}
146+
147+
func saveSnapshotMeta(dir string, storageConfigs []*types.StorageConfig, boot *types.BootConfig) error {
148+
data, err := json.Marshal(snapshotMeta{StorageConfigs: storageConfigs, BootConfig: boot})
149+
if err != nil {
150+
return fmt.Errorf("marshal: %w", err)
151+
}
152+
return os.WriteFile(filepath.Join(dir, snapshotMetaFile), data, 0o600)
153+
}
154+
155+
func loadSnapshotMeta(dir string) (*snapshotMeta, error) {
156+
data, err := os.ReadFile(filepath.Join(dir, snapshotMetaFile)) //nolint:gosec
157+
if err != nil {
158+
return nil, fmt.Errorf("read %s: %w", snapshotMetaFile, err)
159+
}
160+
var meta snapshotMeta
161+
if err := json.Unmarshal(data, &meta); err != nil {
162+
return nil, fmt.Errorf("decode %s: %w", snapshotMetaFile, err)
163+
}
164+
return &meta, nil
165+
}

0 commit comments

Comments
 (0)