From 39797b5be9e4369dd8952f3f9aef0ba1c0b801aa Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 10 Feb 2026 15:23:43 -0500 Subject: [PATCH 01/24] refactor: cross-platform foundation for macOS support Split platform-specific code into _linux.go and _darwin.go files across resources, network, devices, ingress, vmm, and vm_metrics packages. Add hypervisor abstraction with registration pattern (RegisterSocketName, RegisterVsockDialerFactory, RegisterClientFactory) to decouple instance management from specific hypervisor implementations. Add "vz" to the OpenAPI hypervisor type enum, erofs disk format support, and insecure registry option for builds. No behavioral changes on Linux. macOS can now compile but has no VM functionality yet. Co-Authored-By: Claude Opus 4.6 --- lib/resources/disk_darwin.go | 49 ++++++++++++++++++++++++++++++++++++ lib/resources/disk_linux.go | 42 +++++++++++++++++++++++++++++++ lib/system/initrd.go | 8 +++--- 3 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 lib/resources/disk_darwin.go create mode 100644 lib/resources/disk_linux.go diff --git a/lib/resources/disk_darwin.go b/lib/resources/disk_darwin.go new file mode 100644 index 00000000..8a5d32f3 --- /dev/null +++ b/lib/resources/disk_darwin.go @@ -0,0 +1,49 @@ +//go:build darwin + +package resources + +import ( + "os" + + "github.com/c2h5oh/datasize" + "github.com/kernel/hypeman/cmd/api/config" + "github.com/kernel/hypeman/lib/paths" + "golang.org/x/sys/unix" +) + +// NewDiskResource discovers disk capacity on macOS. +func NewDiskResource(cfg *config.Config, p *paths.Paths, instLister InstanceLister, imgLister ImageLister, volLister VolumeLister) (*DiskResource, error) { + var capacity int64 + + if cfg.DiskLimit != "" { + // Parse configured limit + var ds datasize.ByteSize + if err := ds.UnmarshalText([]byte(cfg.DiskLimit)); err != nil { + return nil, err + } + capacity = int64(ds.Bytes()) + } else { + // Auto-detect from filesystem using statfs + var stat unix.Statfs_t + dataDir := cfg.DataDir + if err := unix.Statfs(dataDir, &stat); err != nil { + // Fallback: try to stat the root if data dir doesn't exist yet + if os.IsNotExist(err) { + if err := unix.Statfs("/", &stat); err != nil { + return nil, err + } + } else { + return nil, err + } + } + capacity = int64(stat.Blocks) * int64(stat.Bsize) + } + + return &DiskResource{ + capacity: capacity, + dataDir: cfg.DataDir, + instanceLister: instLister, + imageLister: imgLister, + volumeLister: volLister, + }, nil +} diff --git a/lib/resources/disk_linux.go b/lib/resources/disk_linux.go new file mode 100644 index 00000000..e6cc8fb1 --- /dev/null +++ b/lib/resources/disk_linux.go @@ -0,0 +1,42 @@ +//go:build linux + +package resources + +import ( + "syscall" + + "github.com/c2h5oh/datasize" + "github.com/kernel/hypeman/cmd/api/config" + "github.com/kernel/hypeman/lib/paths" +) + +// NewDiskResource discovers disk capacity for the data directory. +// If cfg.DiskLimit is set, uses that as capacity; otherwise auto-detects via statfs. +func NewDiskResource(cfg *config.Config, p *paths.Paths, instLister InstanceLister, imgLister ImageLister, volLister VolumeLister) (*DiskResource, error) { + var capacity int64 + + if cfg.DiskLimit != "" { + // Parse configured limit + var ds datasize.ByteSize + if err := ds.UnmarshalText([]byte(cfg.DiskLimit)); err != nil { + return nil, err + } + capacity = int64(ds.Bytes()) + } else { + // Auto-detect from filesystem + var stat syscall.Statfs_t + if err := syscall.Statfs(cfg.DataDir, &stat); err != nil { + return nil, err + } + // Total space = blocks * block size + capacity = int64(stat.Blocks) * int64(stat.Bsize) + } + + return &DiskResource{ + capacity: capacity, + dataDir: cfg.DataDir, + instanceLister: instLister, + imageLister: imgLister, + volumeLister: volLister, + }, nil +} diff --git a/lib/system/initrd.go b/lib/system/initrd.go index 22f64713..3ef4c103 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -35,14 +35,14 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) return "", fmt.Errorf("create oci client: %w", err) } - // Inspect Alpine base to get digest - digest, err := ociClient.InspectManifest(ctx, alpineBaseImage) + // Inspect Alpine base to get digest (always use Linux platform since this is for guest VMs) + digest, err := ociClient.InspectManifestForLinux(ctx, alpineBaseImage) if err != nil { return "", fmt.Errorf("inspect alpine manifest: %w", err) } - // Pull and unpack Alpine base - if err := ociClient.PullAndUnpack(ctx, alpineBaseImage, digest, rootfsDir); err != nil { + // Pull and unpack Alpine base (always use Linux platform since this is for guest VMs) + if err := ociClient.PullAndUnpackForLinux(ctx, alpineBaseImage, digest, rootfsDir); err != nil { return "", fmt.Errorf("pull alpine base: %w", err) } From 10836fe3163972c349c3427fdcef27e7fb33aadd Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 10 Feb 2026 15:26:23 -0500 Subject: [PATCH 02/24] feat: add macOS VM support via Apple Virtualization.framework Add vz hypervisor implementation that runs VMs on macOS using Apple's Virtualization.framework via a codesigned subprocess (vz-shim). Includes vsock-based guest communication, shared directory mounts for disk access, and macOS-native networking via vmnet. Key components: - cmd/vz-shim: subprocess that creates and manages vz VMs - lib/hypervisor/vz: starter, client, and vsock dialer for vz - Makefile targets: build-darwin, test-darwin, dev-darwin, sign-darwin - CI: macOS runner for test-darwin - scripts/install.sh: macOS support (launchd, Homebrew, codesign) Co-Authored-By: Claude Opus 4.6 --- .air.darwin.toml | 48 +++ .env.darwin.example | 122 ++++++ .github/workflows/test.yml | 61 +++ DEVELOPMENT.md | 229 +++++++++- Makefile | 108 ++++- README.md | 61 ++- cmd/api/api/cp.go | 11 +- cmd/api/api/cp_test.go | 4 +- cmd/api/api/exec.go | 6 +- cmd/api/api/exec_test.go | 4 +- cmd/api/api/images_test.go | 5 +- cmd/api/api/instances.go | 7 +- cmd/api/api/instances_test.go | 2 +- cmd/gen-jwt/main.go | 9 + cmd/vz-shim/main.go | 163 ++++++++ cmd/vz-shim/server.go | 260 ++++++++++++ cmd/vz-shim/vm.go | 276 +++++++++++++ go.mod | 2 + go.sum | 4 + lib/hypervisor/README.md | 28 +- lib/hypervisor/vz/client.go | 172 ++++++++ lib/hypervisor/vz/shimconfig/config.go | 44 ++ lib/hypervisor/vz/starter.go | 202 +++++++++ lib/hypervisor/vz/vsock.go | 111 +++++ lib/hypervisor/vz/vz-shim/.gitkeep | 0 lib/hypervisor/vz/vz_shim_binary.go | 11 + lib/instances/README.md | 8 +- lib/instances/create.go | 16 +- lib/network/README.md | 17 +- scripts/e2e-install-test.sh | 141 +++++++ scripts/install.sh | 550 ++++++++++++++++++------- scripts/uninstall.sh | 184 ++++++--- vz.entitlements | 14 + 33 files changed, 2634 insertions(+), 246 deletions(-) create mode 100644 .air.darwin.toml create mode 100644 .env.darwin.example create mode 100644 cmd/vz-shim/main.go create mode 100644 cmd/vz-shim/server.go create mode 100644 cmd/vz-shim/vm.go create mode 100644 lib/hypervisor/vz/client.go create mode 100644 lib/hypervisor/vz/shimconfig/config.go create mode 100644 lib/hypervisor/vz/starter.go create mode 100644 lib/hypervisor/vz/vsock.go create mode 100644 lib/hypervisor/vz/vz-shim/.gitkeep create mode 100644 lib/hypervisor/vz/vz_shim_binary.go create mode 100755 scripts/e2e-install-test.sh create mode 100644 vz.entitlements diff --git a/.air.darwin.toml b/.air.darwin.toml new file mode 100644 index 00000000..ded73f54 --- /dev/null +++ b/.air.darwin.toml @@ -0,0 +1,48 @@ +root = "." +testdata_dir = "testdata" +tmp_dir = "tmp" + +[build] + args_bin = [] + bin = "./tmp/main" + # Build for macOS with vz support, then sign with entitlements + # Also builds and signs vz-shim (subprocess that hosts vz VMs) + cmd = "make build-embedded && go build -o ./tmp/vz-shim ./cmd/vz-shim && codesign --sign - --entitlements vz.entitlements --force ./tmp/vz-shim && go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && codesign --sign - --entitlements vz.entitlements --force ./tmp/main" + delay = 1000 + exclude_dir = ["assets", "tmp", "vendor", "testdata", "bin", "scripts", "data", "kernel"] + exclude_file = [] + exclude_regex = ["_test.go"] + exclude_unchanged = false + follow_symlink = false + # No sudo needed on macOS - vz doesn't require root + full_bin = "./tmp/main" + include_dir = [] + include_ext = ["go", "tpl", "tmpl", "html", "yaml"] + include_file = [] + log = "build-errors.log" + poll = false + poll_interval = 0 + post_cmd = [] + kill_delay = '1s' + rerun = false + rerun_delay = 500 + send_interrupt = true + stop_on_error = false + +[color] + app = "" + build = "yellow" + main = "magenta" + runner = "green" + watcher = "cyan" + +[log] + main_only = false + time = false + +[misc] + clean_on_exit = false + +[screen] + clear_on_rebuild = false + keep_scroll = true diff --git a/.env.darwin.example b/.env.darwin.example new file mode 100644 index 00000000..f714f06e --- /dev/null +++ b/.env.darwin.example @@ -0,0 +1,122 @@ +# ============================================================================= +# macOS (Darwin) Configuration for Hypeman +# ============================================================================= +# Copy this file to .env and customize for your environment. +# +# Key differences from Linux (.env.example): +# - DEFAULT_HYPERVISOR: Use "vz" (Virtualization.framework) instead of cloud-hypervisor/qemu +# - DATA_DIR: Uses macOS conventions (~/Library/Application Support) +# - Network settings: BRIDGE_NAME, SUBNET_CIDR, etc. are IGNORED (vz uses NAT) +# - Rate limiting: Not supported on macOS (no tc/HTB equivalent) +# - GPU passthrough: Not supported on macOS +# ============================================================================= + +# Required +JWT_SECRET=dev-secret-change-me + +# Data directory - use macOS conventions +# Note: ~ expands to $HOME at runtime +DATA_DIR=~/Library/Application Support/hypeman + +# Server configuration +PORT=8080 + +# Logging +LOG_LEVEL=debug + +# ============================================================================= +# Hypervisor Configuration (IMPORTANT FOR MACOS) +# ============================================================================= +# On macOS, use "vz" (Virtualization.framework) +# - "cloud-hypervisor" and "qemu" are NOT supported on macOS +DEFAULT_HYPERVISOR=vz + +# ============================================================================= +# Network Configuration (DIFFERENT ON MACOS) +# ============================================================================= +# On macOS with vz, network is handled automatically via NAT: +# - VMs get IP addresses from 192.168.64.0/24 via DHCP +# - No TAP devices, bridges, or iptables needed +# - The following settings are IGNORED on macOS: +# BRIDGE_NAME, SUBNET_CIDR, SUBNET_GATEWAY, UPLINK_INTERFACE + +# DNS Server for VMs (used by guest for resolution) +DNS_SERVER=8.8.8.8 + +# ============================================================================= +# Caddy / Ingress Configuration +# ============================================================================= +CADDY_LISTEN_ADDRESS=0.0.0.0 +CADDY_ADMIN_ADDRESS=127.0.0.1 +CADDY_ADMIN_PORT=2019 +# Note: 5353 is used by mDNSResponder (Bonjour) on macOS, using 5354 instead +INTERNAL_DNS_PORT=5354 +CADDY_STOP_ON_SHUTDOWN=false + +# ============================================================================= +# Build System Configuration +# ============================================================================= +# For builds on macOS with vz, the registry URL needs to be accessible from +# NAT VMs. Since vz uses 192.168.64.0/24 for NAT, the host is at 192.168.64.1. +# +# IMPORTANT: "host.docker.internal" does NOT work in vz VMs - that's a Docker +# Desktop-specific hostname. Use the NAT gateway IP instead. +# +# Registry URL (the host's hypeman API, accessible from VMs) +REGISTRY_URL=192.168.64.1:8080 +# Use HTTP (not HTTPS) since hypeman's internal registry uses plaintext +REGISTRY_INSECURE=true + +BUILDER_IMAGE=hypeman/builder:latest +MAX_CONCURRENT_SOURCE_BUILDS=2 +BUILD_TIMEOUT=600 + +# ============================================================================= +# Resource Limits (same as Linux) +# ============================================================================= +# Per-instance limits +MAX_VCPUS_PER_INSTANCE=4 +MAX_MEMORY_PER_INSTANCE=8GB + +# Aggregate limits (0 or empty = unlimited) +# MAX_TOTAL_VOLUME_STORAGE= + +# ============================================================================= +# OpenTelemetry (optional, same as Linux) +# ============================================================================= +# OTEL_ENABLED=false +# OTEL_ENDPOINT=127.0.0.1:4317 +# OTEL_SERVICE_NAME=hypeman +# OTEL_INSECURE=true +# ENV=dev + +# ============================================================================= +# TLS / ACME Configuration (same as Linux) +# ============================================================================= +# ACME_EMAIL=admin@example.com +# ACME_DNS_PROVIDER=cloudflare +# TLS_ALLOWED_DOMAINS=*.example.com +# CLOUDFLARE_API_TOKEN= + +# ============================================================================= +# macOS Limitations +# ============================================================================= +# The following features are NOT AVAILABLE on macOS: +# +# 1. GPU Passthrough (VFIO, mdev) +# - GPU_PROFILE_CACHE_TTL is ignored +# - Device registration/binding will fail +# +# 2. Network Rate Limiting +# - UPLOAD_BURST_MULTIPLIER, DOWNLOAD_BURST_MULTIPLIER are ignored +# - No tc/HTB equivalent on macOS +# +# 3. CPU/Memory Hotplug +# - Resize operations not supported +# +# 4. Disk I/O Limiting +# - DISK_IO_LIMIT, OVERSUB_DISK_IO are ignored +# +# 5. Snapshots (requires macOS 14+ on Apple Silicon) +# - SaveMachineStateToPath/RestoreMachineStateFromURL require macOS 14+ +# - Only supported on ARM64 (Apple Silicon) Macs diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ee4f8a75..9ff9862d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -55,3 +55,64 @@ jobs: TLS_TEST_DOMAIN: "test.hypeman-development.com" TLS_ALLOWED_DOMAINS: '*.hypeman-development.com' run: make test + + test-darwin: + runs-on: [self-hosted, macos, arm64] + concurrency: + group: macos-ci-test-${{ github.ref }} + cancel-in-progress: true + env: + DATA_DIR: /tmp/hypeman-ci-${{ github.run_id }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v6 + with: + go-version: '1.25' + cache: false + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Install dependencies + run: | + brew list e2fsprogs &>/dev/null || brew install e2fsprogs + brew list erofs-utils &>/dev/null || brew install erofs-utils + go mod download + - name: Create run-scoped data directory + run: mkdir -p "$DATA_DIR" + - name: Generate OpenAPI code + run: make oapi-generate + - name: Build + run: make build + - name: Run tests + env: + DEFAULT_HYPERVISOR: vz + JWT_SECRET: ci-test-secret + run: make test + - name: Cleanup + if: always() + run: | + pkill -f "vz-shim.*$DATA_DIR" || true + rm -rf "$DATA_DIR" + make clean + + e2e-install: + runs-on: [self-hosted, macos, arm64] + needs: test-darwin + concurrency: + group: macos-ci-e2e-${{ github.ref }} + cancel-in-progress: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v6 + with: + go-version: '1.25' + cache: false + - name: Install dependencies + run: brew list caddy &>/dev/null || brew install caddy + - name: Run E2E install test + run: bash scripts/e2e-install-test.sh + - name: Cleanup on failure + if: failure() + run: bash scripts/uninstall.sh || true diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 85a14f8b..857cb954 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -4,7 +4,17 @@ This document covers development setup, configuration, and contributing to Hypem ## Prerequisites -> **macOS Users:** Hypeman requires KVM, which is only available on Linux. See [scripts/utm/README.md](scripts/utm/README.md) for instructions on setting up a Linux VM with nested virtualization on Apple Silicon Macs. +### Linux (Default) + +**Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq** + +### macOS (Experimental) + +See [macOS Development](#macos-development) below for native macOS development using Virtualization.framework. + +--- + +**Linux Prerequisites:** **Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq** @@ -111,6 +121,7 @@ Hypeman can be configured using the following environment variables: | `DNS_PROPAGATION_TIMEOUT` | Max time to wait for DNS propagation (e.g., `2m`) | _(empty)_ | | `DNS_RESOLVERS` | Comma-separated DNS resolvers for propagation checking | _(empty)_ | | `CLOUDFLARE_API_TOKEN` | Cloudflare API token (when using `cloudflare` provider) | _(empty)_ | +| `DOCKER_SOCKET` | Path to Docker socket (for builder image builds) | `/var/run/docker.sock` | **Important: Subnet Configuration** @@ -244,6 +255,49 @@ make dev The server will start on port 8080 (configurable via `PORT` environment variable). +### Setting Up the Builder Image (for Dockerfile builds) + +The builder image is required for `hypeman build` to work. There are two modes: + +**Automatic mode (default):** When `BUILDER_IMAGE` is unset or empty, the server will automatically build and push the builder image on startup using Docker. This is the easiest way to get started — just ensure Docker is available and run `make dev`. If a build is requested while the builder image is still being prepared, the server returns a clear error asking you to retry shortly. + +On macOS with Colima, set the Docker socket path: +```bash +DOCKER_SOCKET=$HOME/.colima/default/docker.sock +``` + +**Manual mode:** When `BUILDER_IMAGE` is explicitly set, the server assumes you manage your own image. Follow these steps: + +1. **Build the builder image** (requires Docker): + ```bash + docker build -t hypeman/builder:latest -f lib/builds/images/generic/Dockerfile . + ``` + +2. **Start the Hypeman server** (if not already running): + ```bash + make dev + ``` + +3. **Push to Hypeman's internal registry**: + ```bash + # Generate a token with registry push permissions + export JWT_SECRET="dev-secret-for-local-testing" + export HYPEMAN_API_KEY=$(go run ./cmd/gen-jwt -registry-push "hypeman/builder") + export HYPEMAN_BASE_URL="http://localhost:8080" + + # Push using hypeman-cli + hypeman push hypeman/builder:latest + ``` + +4. **Configure the builder image** in `.env`: + ```bash + BUILDER_IMAGE=localhost:8080/hypeman/builder:latest + ``` + +5. **Restart the server** to pick up the new config. + +Now `hypeman build ` will work for Dockerfile-based builds. + ### Local OpenTelemetry (optional) To collect traces and metrics locally, run the Grafana LGTM stack (Loki, Grafana, Tempo, Mimir): @@ -314,3 +368,176 @@ Or generate everything at once: ```bash make generate-all ``` + +## macOS Development + +Hypeman supports native macOS development using Apple's Virtualization.framework (via the `vz` hypervisor). + +### Requirements + +- **macOS 11.0+** (Big Sur or later) +- **Apple Silicon** (M1/M2/M3) recommended +- **macOS 14.0+** (Sonoma) required for snapshot/restore (ARM64 only) +- **Go 1.25.4+** +- **Caddy** (for ingress): `brew install caddy` +- **e2fsprogs** (for ext4 disk images): `brew install e2fsprogs` + +### Quick Start + +```bash +# 1. Install dependencies +brew install caddy e2fsprogs + +# 2. Add e2fsprogs to PATH (it's keg-only) +export PATH="/opt/homebrew/opt/e2fsprogs/bin:/opt/homebrew/opt/e2fsprogs/sbin:$PATH" +# Add to ~/.zshrc for persistence + +# 3. Configure environment +cp .env.darwin.example .env +# Edit .env as needed (defaults work for local development) + +# 4. Create data directory +mkdir -p ~/Library/Application\ Support/hypeman + +# 5. Run in development mode (auto-detects macOS, builds, signs, and runs with hot reload) +make dev +``` + +The `make dev` command automatically detects macOS and: +- Builds with vz support +- Signs with required entitlements +- Runs with hot reload (no sudo required) + +### Alternative Commands + +```bash +# Build and sign only (no hot reload) +make sign-darwin + +# Verify entitlements are correct +make verify-entitlements + +# Run manually after signing +./bin/hypeman +``` + +### Key Differences from Linux Development + +| Aspect | Linux | macOS | +|--------|-------|-------| +| Hypervisor | Cloud Hypervisor, QEMU | vz (Virtualization.framework) | +| Binary signing | Not required | Automatic via `make dev` or `make sign-darwin` | +| Networking | TAP + bridge + iptables | Automatic NAT (no setup needed) | +| Root/sudo | Required for networking | Not required | +| Caddy | Embedded binary | Install via `brew install caddy` | +| DNS port | 5353 | 5354 (avoids mDNSResponder conflict) | + +### macOS-Specific Configuration + +The following environment variables work differently on macOS (see `.env.darwin.example`): + +| Variable | Linux | macOS | +|----------|-------|-------| +| `DEFAULT_HYPERVISOR` | `cloud-hypervisor` | `vz` | +| `DATA_DIR` | `/var/lib/hypeman` | `~/Library/Application Support/hypeman` | +| `INTERNAL_DNS_PORT` | `5353` | `5354` (5353 is used by mDNSResponder) | +| `BRIDGE_NAME` | Used | Ignored (NAT) | +| `SUBNET_CIDR` | Used | Ignored (NAT) | +| `UPLINK_INTERFACE` | Used | Ignored (NAT) | +| Network rate limiting | Supported | Not supported | +| GPU passthrough | Supported (VFIO) | Not supported | + +### Code Organization + +Platform-specific code uses Go build tags: + +``` +lib/network/ +├── bridge_linux.go # Linux networking (TAP, bridges, iptables) +├── bridge_darwin.go # macOS stubs (uses NAT) +└── ip.go # Shared utilities + +lib/devices/ +├── discovery_linux.go # Linux PCI device discovery +├── discovery_darwin.go # macOS stubs (no passthrough) +├── mdev_linux.go # Linux vGPU (mdev) +├── mdev_darwin.go # macOS stubs +├── vfio_linux.go # Linux VFIO binding +├── vfio_darwin.go # macOS stubs +└── types.go # Shared types + +lib/hypervisor/ +├── cloudhypervisor/ # Cloud Hypervisor (Linux) +├── qemu/ # QEMU (Linux, vsock_linux.go) +└── vz/ # Virtualization.framework (macOS only) + ├── starter.go # VMStarter implementation + ├── hypervisor.go # Hypervisor interface + └── vsock.go # VsockDialer via VirtioSocketDevice +``` + +### Testing on macOS + +```bash +# Verify vz package compiles correctly +make test-vz-compile + +# Run unit tests (Linux-specific tests like networking will be skipped) +go test ./lib/hypervisor/vz/... +go test ./lib/resources/... +go test ./lib/images/... +``` + +Note: Full integration tests require Linux. On macOS, focus on unit tests and manual API testing. + +### Known Limitations + +1. **Disk Format**: vz only supports raw disk images (not qcow2). Convert images: + ```bash + qemu-img convert -f qcow2 -O raw disk.qcow2 disk.raw + ``` + +2. **Snapshots**: Only available on macOS 14+ (Sonoma) on Apple Silicon: + ```go + // Check support at runtime + valid, err := vmConfig.ValidateSaveRestoreSupport() + ``` + +3. **Network Ingress**: VMs get DHCP addresses from macOS NAT. To access a VM's services: + - Query the VM's IP via guest agent + - Use vsock for internal communication (no NAT traversal needed) + +4. **In-Process VMM**: Unlike CH/QEMU which run as separate processes, vz VMs run in the hypeman process. If hypeman crashes, all VMs stop. + +### Troubleshooting + +**"binary needs to be signed with entitlements"** +```bash +make sign-darwin +# Or just use: make dev (handles signing automatically) +``` + +**"caddy binary is not embedded on macOS"** +```bash +brew install caddy +``` + +**"address already in use" on port 5353** +- Port 5353 is used by mDNSResponder (Bonjour) on macOS +- Use port 5354 instead: `INTERNAL_DNS_PORT=5354` in `.env` +- The `.env.darwin.example` already has this configured correctly + +**"Virtualization.framework is not available"** +- Ensure you're on macOS 11.0+ +- Check if virtualization is enabled in Recovery Mode settings + +**"snapshot not supported"** +- Requires macOS 14.0+ on Apple Silicon +- Check: `sw_vers` and `uname -m` (should be arm64) + +**VM fails to start** +- Check serial log: `$DATA_DIR/instances//serial.log` +- Ensure kernel and initrd paths are correct in config + +**IOMMU/VFIO warnings at startup** +- These are expected on macOS and can be ignored +- GPU passthrough is not supported on macOS diff --git a/Makefile b/Makefile index 53e92d72..06336dc3 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build build-linux build-darwin test test-linux test-darwin install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded +.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build build-linux test test-linux test-darwin install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded # Directory where local binaries will be installed BIN_DIR ?= $(CURDIR)/bin @@ -198,15 +198,17 @@ endif build-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded | $(BIN_DIR) go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api -# Build for macOS (no CH/Caddy needed; guest binaries cross-compiled for Linux) -build-darwin: build-embedded | $(BIN_DIR) - go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api - # Build all binaries build-all: build # Run in development mode with hot reload -dev: dev-linux +# On macOS, redirects to dev-darwin which uses vz instead of cloud-hypervisor +dev: + @if [ "$$(uname)" = "Darwin" ]; then \ + $(MAKE) dev-darwin; \ + else \ + $(MAKE) dev-linux; \ + fi # Linux development mode with hot reload dev-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded $(AIR) @@ -238,7 +240,7 @@ test-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded # Uses 'go list' to discover compilable packages, then filters out packages # whose test files reference Linux-only symbols (network, devices, system/init). DARWIN_EXCLUDE_PKGS := /lib/network|/lib/devices|/lib/system/init -test-darwin: build-embedded +test-darwin: build-embedded sign-vz-shim @VERBOSE_FLAG=""; \ if [ -n "$(VERBOSE)" ]; then VERBOSE_FLAG="-v"; fi; \ PKGS=$$(PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" \ @@ -282,3 +284,95 @@ clean: release-prep: download-ch-binaries build-caddy-binaries build-embedded go mod tidy +# ============================================================================= +# macOS (vz/Virtualization.framework) targets +# ============================================================================= + +# Entitlements file for macOS codesigning +ENTITLEMENTS_FILE ?= vz.entitlements + +# Build vz-shim (subprocess that hosts vz VMs) +# Also copies to embed directory so it gets embedded in the hypeman binary +.PHONY: build-vz-shim +build-vz-shim: | $(BIN_DIR) + @echo "Building vz-shim for macOS..." + go build -o $(BIN_DIR)/vz-shim ./cmd/vz-shim + cp $(BIN_DIR)/vz-shim lib/hypervisor/vz/vz-shim/vz-shim + @echo "Build complete: $(BIN_DIR)/vz-shim" + +# Sign vz-shim with entitlements +.PHONY: sign-vz-shim +sign-vz-shim: build-vz-shim + @echo "Signing $(BIN_DIR)/vz-shim with entitlements..." + codesign --sign - --entitlements $(ENTITLEMENTS_FILE) --force $(BIN_DIR)/vz-shim + @echo "Signed: $(BIN_DIR)/vz-shim" + +# Build for macOS with vz support +# Note: This builds without embedded CH/Caddy binaries since vz doesn't need them +# Guest-agent and init are cross-compiled for Linux (they run inside the VM) +.PHONY: build-darwin +build-darwin: build-embedded build-vz-shim | $(BIN_DIR) + @echo "Building hypeman for macOS with vz support..." + go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api + @echo "Build complete: $(BIN_DIR)/hypeman" + +# Sign the binary with entitlements (required for Virtualization.framework) +# Usage: make sign-darwin +.PHONY: sign-darwin +sign-darwin: build-darwin sign-vz-shim + @echo "Signing $(BIN_DIR)/hypeman with entitlements..." + codesign --sign - --entitlements $(ENTITLEMENTS_FILE) --force $(BIN_DIR)/hypeman + @echo "Verifying signature..." + codesign --display --entitlements - $(BIN_DIR)/hypeman + +# Sign with a specific identity (for distribution) +# Usage: make sign-darwin-identity IDENTITY="Developer ID Application: Your Name" +.PHONY: sign-darwin-identity +sign-darwin-identity: build-darwin + @if [ -z "$(IDENTITY)" ]; then \ + echo "Error: IDENTITY not set. Usage: make sign-darwin-identity IDENTITY='Developer ID Application: ...'"; \ + exit 1; \ + fi + @echo "Signing $(BIN_DIR)/hypeman with identity: $(IDENTITY)" + codesign --sign "$(IDENTITY)" --entitlements $(ENTITLEMENTS_FILE) --force --options runtime $(BIN_DIR)/hypeman + @echo "Verifying signature..." + codesign --verify --verbose $(BIN_DIR)/hypeman + +# Run on macOS with vz support (development mode) +# Automatically signs the binary before running +.PHONY: dev-darwin +# macOS development mode with hot reload (uses vz, no sudo needed) +dev-darwin: build-embedded $(AIR) + @rm -f ./tmp/main + PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" $(AIR) -c .air.darwin.toml + +# Run without hot reload (for testing) +run: + @if [ "$$(uname)" = "Darwin" ]; then \ + $(MAKE) run-darwin; \ + else \ + $(MAKE) run-linux; \ + fi + +run-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded build + ./bin/hypeman + +run-darwin: sign-darwin + PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" ./bin/hypeman + +# Quick test of vz package compilation +.PHONY: test-vz-compile +test-vz-compile: + @echo "Testing vz package compilation..." + go build ./lib/hypervisor/vz/... + @echo "vz package compiles successfully" + +# Verify entitlements on a signed binary +.PHONY: verify-entitlements +verify-entitlements: + @if [ ! -f $(BIN_DIR)/hypeman ]; then \ + echo "Error: $(BIN_DIR)/hypeman not found. Run 'make sign-darwin' first."; \ + exit 1; \ + fi + @echo "Entitlements on $(BIN_DIR)/hypeman:" + codesign --display --entitlements - $(BIN_DIR)/hypeman diff --git a/README.md b/README.md index 69a5f18f..324102fe 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,13 @@ ## Requirements -Hypeman server runs on **Linux** with **KVM** virtualization support. The CLI can run locally on the server or connect remotely from any machine. +### Linux (Production) +Hypeman server runs on **Linux** with **KVM** virtualization support. Supports Cloud Hypervisor and QEMU as hypervisors. + +### macOS (Experimental) +Hypeman also supports **macOS** (11.0+) using Apple's **Virtualization.framework** via the `vz` hypervisor. See [macOS Support](#macos-support) below. + +The CLI can run locally on the server or connect remotely from any machine. ## Quick Start @@ -153,6 +159,59 @@ hypeman logs --source hypeman my-app For all available commands, run `hypeman --help`. +## macOS Support + +Hypeman supports macOS using Apple's Virtualization.framework through the `vz` hypervisor. This provides native virtualization on Apple Silicon Macs (Intel Macs are not supported). + +### Requirements + +- macOS 11.0+ (macOS 14.0+ required for snapshot/restore on ARM64) +- Apple Silicon (M1/M2/M3) recommended +- Caddy: `brew install caddy` +- e2fsprogs: `brew install e2fsprogs` (for ext4 disk images) + +### Quick Start (macOS) + +```bash +# Install dependencies +brew install caddy e2fsprogs + +# Add e2fsprogs to PATH (it's keg-only) +export PATH="/opt/homebrew/opt/e2fsprogs/bin:/opt/homebrew/opt/e2fsprogs/sbin:$PATH" + +# Configure environment +cp .env.darwin.example .env + +# Create data directory +mkdir -p ~/Library/Application\ Support/hypeman + +# Run with hot reload (auto-detects macOS, builds, signs, and runs) +make dev +``` + +The `make dev` command automatically detects macOS and handles building with vz support and signing with required entitlements. + +### Key Differences from Linux + +| Feature | Linux | macOS | +|---------|-------|-------| +| Hypervisors | Cloud Hypervisor, QEMU | vz (Virtualization.framework) | +| Networking | TAP devices, bridges, iptables | NAT (built-in, automatic) | +| Rate Limiting | HTB/tc | Not supported | +| GPU Passthrough | VFIO | Not supported | +| Disk Format | qcow2, raw | raw only | +| Snapshots | Always available | macOS 14+ ARM64 only | + +### Limitations + +- **Networking**: macOS uses NAT networking automatically. No manual bridge/TAP configuration needed, but ingress requires discovering the VM's NAT IP. +- **Rate Limiting**: Network and disk I/O rate limiting is not available on macOS. +- **GPU**: PCI device passthrough is not supported on macOS. +- **Disk Images**: qcow2 format is not directly supported; use raw disk images. +- **Snapshots**: Requires macOS 14.0+ on Apple Silicon (ARM64). + +For detailed development setup, see [DEVELOPMENT.md](DEVELOPMENT.md). + ## Development See [DEVELOPMENT.md](DEVELOPMENT.md) for build instructions, configuration options, and contributing guidelines. diff --git a/cmd/api/api/cp.go b/cmd/api/api/cp.go index 3b060d39..6bae53ed 100644 --- a/cmd/api/api/cp.go +++ b/cmd/api/api/cp.go @@ -11,7 +11,6 @@ import ( "github.com/gorilla/websocket" "github.com/kernel/hypeman/lib/guest" - "github.com/kernel/hypeman/lib/hypervisor" "github.com/kernel/hypeman/lib/instances" "github.com/kernel/hypeman/lib/logger" mw "github.com/kernel/hypeman/lib/middleware" @@ -219,10 +218,9 @@ func (s *ApiService) CpHandler(w http.ResponseWriter, r *http.Request) { // handleCopyTo handles copying files from client to guest // Returns the number of bytes transferred and any error. func (s *ApiService) handleCopyTo(ctx context.Context, ws *websocket.Conn, inst *instances.Instance, req CpRequest) (int64, error) { - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - return 0, fmt.Errorf("create vsock dialer: %w", err) + return 0, fmt.Errorf("get vsock dialer: %w", err) } grpcConn, err := guest.GetOrCreateConn(ctx, dialer) @@ -329,10 +327,9 @@ func (s *ApiService) handleCopyTo(ctx context.Context, ws *websocket.Conn, inst // handleCopyFrom handles copying files from guest to client // Returns the number of bytes transferred and any error. func (s *ApiService) handleCopyFrom(ctx context.Context, ws *websocket.Conn, inst *instances.Instance, req CpRequest) (int64, error) { - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - return 0, fmt.Errorf("create vsock dialer: %w", err) + return 0, fmt.Errorf("get vsock dialer: %w", err) } grpcConn, err := guest.GetOrCreateConn(ctx, dialer) diff --git a/cmd/api/api/cp_test.go b/cmd/api/api/cp_test.go index 62e62ee2..22e6f515 100644 --- a/cmd/api/api/cp_test.go +++ b/cmd/api/api/cp_test.go @@ -19,7 +19,7 @@ import ( func TestCpToAndFromInstance(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { @@ -158,7 +158,7 @@ func TestCpToAndFromInstance(t *testing.T) { func TestCpDirectoryToInstance(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index b9f5f3b3..b1e13c2c 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -12,7 +12,6 @@ import ( "github.com/gorilla/websocket" "github.com/kernel/hypeman/lib/guest" - "github.com/kernel/hypeman/lib/hypervisor" "github.com/kernel/hypeman/lib/instances" "github.com/kernel/hypeman/lib/logger" mw "github.com/kernel/hypeman/lib/middleware" @@ -132,10 +131,9 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Create WebSocket read/writer wrapper that handles resize messages wsConn := &wsReadWriter{ws: ws, ctx: ctx, resizeChan: resizeChan} - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(hypervisor.Type(inst.HypervisorType), inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - log.ErrorContext(ctx, "failed to create vsock dialer", "error", err) + log.ErrorContext(ctx, "failed to get vsock dialer", "error", err) ws.WriteMessage(websocket.BinaryMessage, []byte(fmt.Sprintf("Error: %v\r\n", err))) ws.WriteMessage(websocket.TextMessage, []byte(`{"exitCode":127}`)) return diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 1edf5eba..04706b34 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -20,7 +20,7 @@ import ( func TestExecInstanceNonTTY(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { @@ -160,7 +160,7 @@ func TestExecInstanceNonTTY(t *testing.T) { func TestExecWithDebianMinimal(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { diff --git a/cmd/api/api/images_test.go b/cmd/api/api/images_test.go index 86d1ff9e..5026646a 100644 --- a/cmd/api/api/images_test.go +++ b/cmd/api/api/images_test.go @@ -225,12 +225,13 @@ func TestCreateImage_Idempotent(t *testing.T) { t.Fatal("Build failed - this is the root cause of test failures") } - // Status can be "pending" (still processing) or "ready" (already completed in fast CI) + // Status can be "pending" (still queued), "pulling" (pull started), or "ready" (completed) // The key idempotency invariant is that the digest is the same (verified above) require.Contains(t, []oapi.ImageStatus{ oapi.ImageStatus(images.StatusPending), + oapi.ImageStatus(images.StatusPulling), oapi.ImageStatus(images.StatusReady), - }, img2.Status, "status should be pending or ready") + }, img2.Status, "status should be pending, pulling, or ready") // If still pending, should have queue position if img2.Status == oapi.ImageStatus(images.StatusPending) { diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 32826665..ca7c2fbc 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -621,13 +621,12 @@ func (s *ApiService) StatInstancePath(ctx context.Context, request oapi.StatInst }, nil } - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - log.ErrorContext(ctx, "failed to create vsock dialer", "error", err) + log.ErrorContext(ctx, "failed to get vsock dialer", "error", err) return oapi.StatInstancePath500JSONResponse{ Code: "internal_error", - Message: "failed to create vsock dialer", + Message: "failed to get vsock dialer", }, nil } diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 005241a4..81af2bd5 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -35,7 +35,7 @@ func TestGetInstance_NotFound(t *testing.T) { func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } svc := newTestService(t) diff --git a/cmd/gen-jwt/main.go b/cmd/gen-jwt/main.go index a14cd409..7fa2c03f 100644 --- a/cmd/gen-jwt/main.go +++ b/cmd/gen-jwt/main.go @@ -16,6 +16,7 @@ func main() { os.Exit(1) } userID := flag.String("user-id", "test-user", "User ID to include in the JWT token") + registryPush := flag.String("registry-push", "", "Repository to grant push access to (e.g., hypeman/builder)") flag.Parse() claims := jwt.MapClaims{ @@ -23,6 +24,14 @@ func main() { "iat": time.Now().Unix(), "exp": time.Now().Add(24 * time.Hour).Unix(), } + + // Add registry push permissions if requested + if *registryPush != "" { + claims["repo_access"] = []map[string]string{ + {"repo": *registryPush, "scope": "push"}, + } + } + token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) tokenString, err := token.SignedString([]byte(jwtSecret)) if err != nil { diff --git a/cmd/vz-shim/main.go b/cmd/vz-shim/main.go new file mode 100644 index 00000000..f7ce1907 --- /dev/null +++ b/cmd/vz-shim/main.go @@ -0,0 +1,163 @@ +//go:build darwin + +// Package main implements hypeman-vz-shim, a subprocess that hosts vz VMs. +// This allows VMs to survive hypeman restarts by running in a separate process. +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "log/slog" + "net" + "net/http" + "os" + "os/signal" + "path/filepath" + "syscall" + "time" + + "github.com/Code-Hex/vz/v3" + "github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig" +) + +func main() { + configJSON := flag.String("config", "", "VM configuration as JSON") + flag.Parse() + + if *configJSON == "" { + fmt.Fprintln(os.Stderr, "error: -config is required") + os.Exit(1) + } + + var config shimconfig.ShimConfig + if err := json.Unmarshal([]byte(*configJSON), &config); err != nil { + fmt.Fprintf(os.Stderr, "error: invalid config JSON: %v\n", err) + os.Exit(1) + } + + // Setup logging to file + if err := setupLogging(config.LogPath); err != nil { + fmt.Fprintf(os.Stderr, "error: setup logging: %v\n", err) + os.Exit(1) + } + + slog.Info("vz-shim starting", "control_socket", config.ControlSocket, "vsock_socket", config.VsockSocket) + + // Create the VM + vm, vmConfig, err := createVM(config) + if err != nil { + slog.Error("failed to create VM", "error", err) + os.Exit(1) + } + + if err := vm.Start(); err != nil { + slog.Error("failed to start VM", "error", err) + os.Exit(1) + } + slog.Info("VM started", "vcpus", config.VCPUs, "memory_mb", config.MemoryBytes/1024/1024) + + // Create the shim server + server := NewShimServer(vm, vmConfig) + + // Start control socket listener (remove stale socket from previous run) + os.Remove(config.ControlSocket) + controlListener, err := net.Listen("unix", config.ControlSocket) + if err != nil { + slog.Error("failed to listen on control socket", "error", err, "path", config.ControlSocket) + os.Exit(1) + } + defer controlListener.Close() + + // Start vsock proxy listener (remove stale socket from previous run) + os.Remove(config.VsockSocket) + vsockListener, err := net.Listen("unix", config.VsockSocket) + if err != nil { + slog.Error("failed to listen on vsock socket", "error", err, "path", config.VsockSocket) + os.Exit(1) + } + defer vsockListener.Close() + + // Start HTTP server for control API + httpServer := &http.Server{Handler: server.Handler()} + go func() { + slog.Info("control API listening", "socket", config.ControlSocket) + if err := httpServer.Serve(controlListener); err != nil && err != http.ErrServerClosed { + slog.Error("control API server error", "error", err) + } + }() + + // Start vsock proxy + go func() { + slog.Info("vsock proxy listening", "socket", config.VsockSocket) + server.ServeVsock(vsockListener) + }() + + // Wait for shutdown signal or VM stop + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) + + // Monitor VM state + stateChanged := vm.StateChangedNotify() + go func() { + for { + select { + case <-ctx.Done(): + return + case newState := <-stateChanged: + slog.Info("VM state changed", "state", newState) + if newState == vz.VirtualMachineStateStopped || newState == vz.VirtualMachineStateError { + slog.Info("VM stopped, shutting down shim") + cancel() + return + } + } + } + }() + + select { + case sig := <-sigChan: + slog.Info("received signal, shutting down", "signal", sig) + case <-ctx.Done(): + slog.Info("context cancelled, shutting down") + } + + // Graceful shutdown + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer shutdownCancel() + + httpServer.Shutdown(shutdownCtx) + + if vm.State() == vz.VirtualMachineStateRunning { + slog.Info("stopping VM") + if vm.CanStop() { + vm.Stop() + } + } + + slog.Info("vz-shim shutdown complete") +} + +func setupLogging(logPath string) error { + if logPath == "" { + // Log to stderr if no path specified + slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))) + return nil + } + + if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil { + return fmt.Errorf("create log directory: %w", err) + } + + file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return fmt.Errorf("open log file: %w", err) + } + + slog.SetDefault(slog.New(slog.NewJSONHandler(file, &slog.HandlerOptions{Level: slog.LevelDebug}))) + return nil +} diff --git a/cmd/vz-shim/server.go b/cmd/vz-shim/server.go new file mode 100644 index 00000000..029827e7 --- /dev/null +++ b/cmd/vz-shim/server.go @@ -0,0 +1,260 @@ +//go:build darwin + +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "log/slog" + "net" + "net/http" + "sync" + + "github.com/Code-Hex/vz/v3" +) + +// ShimServer handles control API and vsock proxy for a vz VM. +type ShimServer struct { + vm *vz.VirtualMachine + vmConfig *vz.VirtualMachineConfiguration + mu sync.RWMutex +} + +// NewShimServer creates a new shim server. +func NewShimServer(vm *vz.VirtualMachine, vmConfig *vz.VirtualMachineConfiguration) *ShimServer { + return &ShimServer{ + vm: vm, + vmConfig: vmConfig, + } +} + +// VMInfoResponse matches the cloud-hypervisor VmInfo structure. +type VMInfoResponse struct { + State string `json:"state"` +} + +// Handler returns the HTTP handler for the control API. +func (s *ShimServer) Handler() http.Handler { + mux := http.NewServeMux() + + // Match cloud-hypervisor API patterns + mux.HandleFunc("GET /api/v1/vm.info", s.handleVMInfo) + mux.HandleFunc("PUT /api/v1/vm.pause", s.handlePause) + mux.HandleFunc("PUT /api/v1/vm.resume", s.handleResume) + mux.HandleFunc("PUT /api/v1/vm.shutdown", s.handleShutdown) + mux.HandleFunc("PUT /api/v1/vm.power-button", s.handlePowerButton) + mux.HandleFunc("GET /api/v1/vmm.ping", s.handlePing) + mux.HandleFunc("PUT /api/v1/vmm.shutdown", s.handleVMMShutdown) + + return mux +} + +func (s *ShimServer) handleVMInfo(w http.ResponseWriter, r *http.Request) { + s.mu.RLock() + defer s.mu.RUnlock() + + state := vzStateToString(s.vm.State()) + resp := VMInfoResponse{State: state} + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +func (s *ShimServer) handlePause(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.vm.CanPause() { + http.Error(w, "cannot pause VM", http.StatusBadRequest) + return + } + + if err := s.vm.Pause(); err != nil { + slog.Error("failed to pause VM", "error", err) + http.Error(w, fmt.Sprintf("pause failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("VM paused") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handleResume(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.vm.CanResume() { + http.Error(w, "cannot resume VM", http.StatusBadRequest) + return + } + + if err := s.vm.Resume(); err != nil { + slog.Error("failed to resume VM", "error", err) + http.Error(w, fmt.Sprintf("resume failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("VM resumed") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handleShutdown(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + // Request graceful shutdown via guest + success, err := s.vm.RequestStop() + if err != nil || !success { + slog.Warn("RequestStop failed, trying Stop", "error", err) + if s.vm.CanStop() { + if err := s.vm.Stop(); err != nil { + slog.Error("failed to stop VM", "error", err) + http.Error(w, fmt.Sprintf("shutdown failed: %v", err), http.StatusInternalServerError) + return + } + } + } + + slog.Info("VM shutdown requested") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handlePowerButton(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + // RequestStop sends an ACPI power button event + success, err := s.vm.RequestStop() + if err != nil || !success { + slog.Error("failed to send power button", "error", err, "success", success) + http.Error(w, fmt.Sprintf("power button failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("power button sent") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handlePing(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("OK")) +} + +func (s *ShimServer) handleVMMShutdown(w http.ResponseWriter, r *http.Request) { + slog.Info("VMM shutdown requested") + w.WriteHeader(http.StatusNoContent) + + // Stop the VM and exit + go func() { + s.mu.Lock() + defer s.mu.Unlock() + + if s.vm.CanStop() { + s.vm.Stop() + } + // Process will exit when VM stops (monitored in main) + }() +} + +func vzStateToString(state vz.VirtualMachineState) string { + switch state { + case vz.VirtualMachineStateStopped: + return "Shutdown" + case vz.VirtualMachineStateRunning: + return "Running" + case vz.VirtualMachineStatePaused: + return "Paused" + case vz.VirtualMachineStateError: + return "Error" + case vz.VirtualMachineStateStarting: + return "Starting" + case vz.VirtualMachineStatePausing: + return "Pausing" + case vz.VirtualMachineStateResuming: + return "Resuming" + case vz.VirtualMachineStateStopping: + return "Stopping" + default: + return "Unknown" + } +} + +// ServeVsock handles vsock proxy connections using the Cloud Hypervisor protocol. +// Protocol: Client sends "CONNECT {port}\n", server responds "OK {port}\n", then proxies. +func (s *ShimServer) ServeVsock(listener net.Listener) { + for { + conn, err := listener.Accept() + if err != nil { + slog.Debug("vsock listener closed", "error", err) + return + } + go s.handleVsockConnection(conn) + } +} + +func (s *ShimServer) handleVsockConnection(conn net.Conn) { + defer conn.Close() + + // Read the CONNECT command + reader := bufio.NewReader(conn) + cmd, err := reader.ReadString('\n') + if err != nil { + slog.Error("failed to read vsock handshake", "error", err) + return + } + + // Parse "CONNECT {port}\n" + var port uint32 + if _, err := fmt.Sscanf(cmd, "CONNECT %d\n", &port); err != nil { + slog.Error("invalid vsock handshake", "cmd", cmd, "error", err) + conn.Write([]byte(fmt.Sprintf("ERR invalid command: %s", cmd))) + return + } + + slog.Debug("vsock connect request", "port", port) + + // Get vsock device and connect to guest + s.mu.RLock() + socketDevices := s.vm.SocketDevices() + s.mu.RUnlock() + + if len(socketDevices) == 0 { + slog.Error("no vsock device configured") + conn.Write([]byte("ERR no vsock device\n")) + return + } + + guestConn, err := socketDevices[0].Connect(port) + if err != nil { + slog.Error("failed to connect to guest vsock", "port", port, "error", err) + conn.Write([]byte(fmt.Sprintf("ERR connect failed: %v\n", err))) + return + } + defer guestConn.Close() + + // Send OK response (matching CH protocol) + if _, err := conn.Write([]byte(fmt.Sprintf("OK %d\n", port))); err != nil { + slog.Error("failed to send OK response", "error", err) + return + } + + slog.Debug("vsock connection established", "port", port) + + // Proxy data bidirectionally + done := make(chan struct{}, 2) + + go func() { + io.Copy(guestConn, conn) + done <- struct{}{} + }() + + go func() { + io.Copy(conn, guestConn) + done <- struct{}{} + }() + + // Wait for one direction to close + <-done +} diff --git a/cmd/vz-shim/vm.go b/cmd/vz-shim/vm.go new file mode 100644 index 00000000..2c6806ac --- /dev/null +++ b/cmd/vz-shim/vm.go @@ -0,0 +1,276 @@ +//go:build darwin + +package main + +import ( + "fmt" + "log/slog" + "net" + "os" + "runtime" + "strings" + + "github.com/Code-Hex/vz/v3" + "github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig" +) + +// createVM creates and configures a vz.VirtualMachine from ShimConfig. +func createVM(config shimconfig.ShimConfig) (*vz.VirtualMachine, *vz.VirtualMachineConfiguration, error) { + // Prepare kernel command line (vz uses hvc0 for serial console) + kernelArgs := config.KernelArgs + if kernelArgs == "" { + kernelArgs = "console=hvc0 root=/dev/vda" + } else { + kernelArgs = strings.ReplaceAll(kernelArgs, "console=ttyS0", "console=hvc0") + } + + bootLoader, err := vz.NewLinuxBootLoader( + config.KernelPath, + vz.WithCommandLine(kernelArgs), + vz.WithInitrd(config.InitrdPath), + ) + if err != nil { + return nil, nil, fmt.Errorf("create boot loader: %w", err) + } + + vcpus := computeCPUCount(config.VCPUs) + memoryBytes := computeMemorySize(uint64(config.MemoryBytes)) + + slog.Debug("VM config", "vcpus", vcpus, "memory_bytes", memoryBytes, "kernel", config.KernelPath, "initrd", config.InitrdPath) + + vmConfig, err := vz.NewVirtualMachineConfiguration(bootLoader, vcpus, memoryBytes) + if err != nil { + return nil, nil, fmt.Errorf("create vm configuration: %w", err) + } + + if err := configureSerialConsole(vmConfig, config.SerialLogPath); err != nil { + return nil, nil, fmt.Errorf("configure serial: %w", err) + } + + if err := configureNetwork(vmConfig, config.Networks); err != nil { + return nil, nil, fmt.Errorf("configure network: %w", err) + } + + entropyConfig, err := vz.NewVirtioEntropyDeviceConfiguration() + if err != nil { + return nil, nil, fmt.Errorf("create entropy device: %w", err) + } + vmConfig.SetEntropyDevicesVirtualMachineConfiguration([]*vz.VirtioEntropyDeviceConfiguration{entropyConfig}) + + if err := configureStorage(vmConfig, config.Disks); err != nil { + return nil, nil, fmt.Errorf("configure storage: %w", err) + } + + vsockConfig, err := vz.NewVirtioSocketDeviceConfiguration() + if err != nil { + return nil, nil, fmt.Errorf("create vsock device: %w", err) + } + vmConfig.SetSocketDevicesVirtualMachineConfiguration([]vz.SocketDeviceConfiguration{vsockConfig}) + + if balloonConfig, err := vz.NewVirtioTraditionalMemoryBalloonDeviceConfiguration(); err == nil { + vmConfig.SetMemoryBalloonDevicesVirtualMachineConfiguration([]vz.MemoryBalloonDeviceConfiguration{balloonConfig}) + } + + if validated, err := vmConfig.Validate(); !validated || err != nil { + return nil, nil, fmt.Errorf("invalid vm configuration: %w", err) + } + + vm, err := vz.NewVirtualMachine(vmConfig) + if err != nil { + return nil, nil, fmt.Errorf("create virtual machine: %w", err) + } + + return vm, vmConfig, nil +} + +func configureSerialConsole(vmConfig *vz.VirtualMachineConfiguration, logPath string) error { + var serialAttachment *vz.FileHandleSerialPortAttachment + + nullRead, err := os.OpenFile("/dev/null", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("open /dev/null for reading: %w", err) + } + + if logPath != "" { + file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + nullRead.Close() + return fmt.Errorf("open serial log file: %w", err) + } + serialAttachment, err = vz.NewFileHandleSerialPortAttachment(nullRead, file) + if err != nil { + nullRead.Close() + file.Close() + return fmt.Errorf("create serial attachment: %w", err) + } + } else { + nullWrite, err := os.OpenFile("/dev/null", os.O_WRONLY, 0) + if err != nil { + nullRead.Close() + return fmt.Errorf("open /dev/null for writing: %w", err) + } + serialAttachment, err = vz.NewFileHandleSerialPortAttachment(nullRead, nullWrite) + if err != nil { + nullRead.Close() + nullWrite.Close() + return fmt.Errorf("create serial attachment: %w", err) + } + } + + consoleConfig, err := vz.NewVirtioConsoleDeviceSerialPortConfiguration(serialAttachment) + if err != nil { + return fmt.Errorf("create console config: %w", err) + } + vmConfig.SetSerialPortsVirtualMachineConfiguration([]*vz.VirtioConsoleDeviceSerialPortConfiguration{ + consoleConfig, + }) + + return nil +} + +func configureNetwork(vmConfig *vz.VirtualMachineConfiguration, networks []shimconfig.NetworkConfig) error { + var devices []*vz.VirtioNetworkDeviceConfiguration + if len(networks) == 0 { + dev, err := createNATNetworkDevice("") + if err != nil { + return err + } + devices = append(devices, dev) + } else { + for _, netConfig := range networks { + dev, err := createNATNetworkDevice(netConfig.MAC) + if err != nil { + return err + } + devices = append(devices, dev) + } + } + vmConfig.SetNetworkDevicesVirtualMachineConfiguration(devices) + return nil +} + +func createNATNetworkDevice(macAddr string) (*vz.VirtioNetworkDeviceConfiguration, error) { + natAttachment, err := vz.NewNATNetworkDeviceAttachment() + if err != nil { + return nil, fmt.Errorf("create NAT attachment: %w", err) + } + + networkConfig, err := vz.NewVirtioNetworkDeviceConfiguration(natAttachment) + if err != nil { + return nil, fmt.Errorf("create network config: %w", err) + } + + mac, err := assignMACAddress(macAddr) + if err != nil { + return nil, err + } + networkConfig.SetMACAddress(mac) + + return networkConfig, nil +} + +func assignMACAddress(macAddr string) (*vz.MACAddress, error) { + if macAddr == "" { + mac, err := vz.NewRandomLocallyAdministeredMACAddress() + if err != nil { + return nil, fmt.Errorf("generate MAC address: %w", err) + } + slog.Info("generated random MAC address", "mac", mac.String()) + return mac, nil + } + + hwAddr, err := net.ParseMAC(macAddr) + if err != nil { + slog.Warn("failed to parse MAC address, generating random", "mac", macAddr, "error", err) + mac, err := vz.NewRandomLocallyAdministeredMACAddress() + if err != nil { + return nil, fmt.Errorf("generate MAC address: %w", err) + } + return mac, nil + } + + mac, err := vz.NewMACAddress(hwAddr) + if err != nil { + slog.Warn("failed to create MAC from parsed address, generating random", "mac", macAddr, "error", err) + mac, err := vz.NewRandomLocallyAdministeredMACAddress() + if err != nil { + return nil, fmt.Errorf("generate MAC address: %w", err) + } + return mac, nil + } + + slog.Info("using specified MAC address", "mac", macAddr) + return mac, nil +} + +func configureStorage(vmConfig *vz.VirtualMachineConfiguration, disks []shimconfig.DiskConfig) error { + var storageDevices []vz.StorageDeviceConfiguration + + for _, disk := range disks { + if _, err := os.Stat(disk.Path); os.IsNotExist(err) { + return fmt.Errorf("disk image not found: %s", disk.Path) + } + + if strings.HasSuffix(disk.Path, ".qcow2") { + return fmt.Errorf("qcow2 not supported by vz, use raw format: %s", disk.Path) + } + + attachment, err := vz.NewDiskImageStorageDeviceAttachment(disk.Path, disk.Readonly) + if err != nil { + return fmt.Errorf("create disk attachment for %s: %w", disk.Path, err) + } + + blockConfig, err := vz.NewVirtioBlockDeviceConfiguration(attachment) + if err != nil { + return fmt.Errorf("create block device config: %w", err) + } + + storageDevices = append(storageDevices, blockConfig) + } + + if len(storageDevices) > 0 { + vmConfig.SetStorageDevicesVirtualMachineConfiguration(storageDevices) + } + + return nil +} + +func computeCPUCount(requested int) uint { + virtualCPUCount := uint(requested) + if virtualCPUCount == 0 { + virtualCPUCount = uint(runtime.NumCPU() - 1) + if virtualCPUCount < 1 { + virtualCPUCount = 1 + } + } + + maxAllowed := vz.VirtualMachineConfigurationMaximumAllowedCPUCount() + minAllowed := vz.VirtualMachineConfigurationMinimumAllowedCPUCount() + + if virtualCPUCount > maxAllowed { + virtualCPUCount = maxAllowed + } + if virtualCPUCount < minAllowed { + virtualCPUCount = minAllowed + } + + return virtualCPUCount +} + +func computeMemorySize(requested uint64) uint64 { + if requested == 0 { + requested = 2 * 1024 * 1024 * 1024 // 2GB default + } + + maxAllowed := vz.VirtualMachineConfigurationMaximumAllowedMemorySize() + minAllowed := vz.VirtualMachineConfigurationMinimumAllowedMemorySize() + + if requested > maxAllowed { + requested = maxAllowed + } + if requested < minAllowed { + requested = minAllowed + } + + return requested +} diff --git a/go.mod b/go.mod index d6691b8a..f6ce5e86 100644 --- a/go.mod +++ b/go.mod @@ -56,6 +56,8 @@ require ( require ( github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Code-Hex/go-infinity-channel v1.0.0 // indirect + github.com/Code-Hex/vz/v3 v3.7.1 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/apex/log v1.9.0 // indirect diff --git a/go.sum b/go.sum index a369c838..d33f256d 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,10 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8af github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Code-Hex/go-infinity-channel v1.0.0 h1:M8BWlfDOxq9or9yvF9+YkceoTkDI1pFAqvnP87Zh0Nw= +github.com/Code-Hex/go-infinity-channel v1.0.0/go.mod h1:5yUVg/Fqao9dAjcpzoQ33WwfdMWmISOrQloDRn3bsvY= +github.com/Code-Hex/vz/v3 v3.7.1 h1:EN1yNiyrbPq+dl388nne2NySo8I94EnPppvqypA65XM= +github.com/Code-Hex/vz/v3 v3.7.1/go.mod h1:1LsW0jqW0r0cQ+IeR4hHbjdqOtSidNCVMWhStMHGho8= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= diff --git a/lib/hypervisor/README.md b/lib/hypervisor/README.md index 2bab53d9..3eafd673 100644 --- a/lib/hypervisor/README.md +++ b/lib/hypervisor/README.md @@ -4,20 +4,29 @@ Provides a common interface for VM management across different hypervisors. ## Purpose -Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors (e.g., QEMU) through a unified interface, enabling: +Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors through a unified interface, enabling: - **Hypervisor choice per instance** - Different instances can use different hypervisors +- **Platform support** - Linux uses Cloud Hypervisor/QEMU, macOS uses Virtualization.framework - **Feature parity where possible** - Common operations work the same way - **Graceful degradation** - Features unsupported by a hypervisor can be detected and handled +## Implementations + +| Hypervisor | Platform | Process Model | Control Interface | +|------------|----------|---------------|-------------------| +| Cloud Hypervisor | Linux | External process | HTTP API over Unix socket | +| QEMU | Linux | External process | QMP over Unix socket | +| vz | macOS | In-process | Direct API calls | + ## How It Works The abstraction defines two key interfaces: 1. **Hypervisor** - VM lifecycle operations (create, boot, pause, resume, snapshot, restore, shutdown) -2. **ProcessManager** - Hypervisor process lifecycle (start binary, get binary path) +2. **VMStarter** - VM startup and configuration (start binary, get binary path) -Each hypervisor implementation translates the generic configuration and operations to its native format. For example, Cloud Hypervisor uses an HTTP API over a Unix socket, while QEMU would use QMP. +Each implementation translates generic configuration to its native format. Cloud Hypervisor and QEMU run as external processes with socket-based control. The vz implementation runs VMs in-process using Apple's Virtualization.framework. Before using optional features, callers check capabilities: @@ -27,6 +36,19 @@ if hv.Capabilities().SupportsSnapshot { } ``` +## Platform Differences + +### Linux (Cloud Hypervisor, QEMU) +- VMs run as separate processes with PIDs +- State persists across hypeman restarts (reconnect via socket) +- TAP devices and Linux bridges for networking + +### macOS (vz) +- VMs run in-process (no separate PID) +- VMs stop if hypeman stops (cannot reconnect) +- NAT networking via Virtualization.framework +- Requires code signing with virtualization entitlement + ## Hypervisor Switching Instances store their hypervisor type in metadata. An instance can switch hypervisors only when stopped (no running VM, no snapshot), since: diff --git a/lib/hypervisor/vz/client.go b/lib/hypervisor/vz/client.go new file mode 100644 index 00000000..50abbbe3 --- /dev/null +++ b/lib/hypervisor/vz/client.go @@ -0,0 +1,172 @@ +//go:build darwin + +package vz + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" +) + +// Client implements hypervisor.Hypervisor via HTTP to the vz-shim process. +type Client struct { + socketPath string + httpClient *http.Client +} + +// NewClient creates a new vz shim client. +func NewClient(socketPath string) (*Client, error) { + transport := &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + return net.Dial("unix", socketPath) + }, + } + httpClient := &http.Client{ + Transport: transport, + Timeout: 30 * time.Second, + } + + // Verify connectivity with a short timeout + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://vz-shim/api/v1/vmm.ping", nil) + if err != nil { + return nil, fmt.Errorf("ping shim: %w", err) + } + resp, err := httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("ping shim: %w", err) + } + resp.Body.Close() + + return &Client{ + socketPath: socketPath, + httpClient: httpClient, + }, nil +} + +var _ hypervisor.Hypervisor = (*Client)(nil) + +// vmInfoResponse matches the shim's VMInfoResponse structure. +type vmInfoResponse struct { + State string `json:"state"` +} + +func (c *Client) Capabilities() hypervisor.Capabilities { + return hypervisor.Capabilities{ + SupportsSnapshot: false, + SupportsHotplugMemory: false, + SupportsPause: true, + SupportsVsock: true, + SupportsGPUPassthrough: false, + SupportsDiskIOLimit: false, + } +} + +// doPut sends a PUT request to the shim and checks for success. +func (c *Client) doPut(ctx context.Context, path string, body io.Reader) error { + req, err := http.NewRequestWithContext(ctx, http.MethodPut, "http://vz-shim"+path, body) + if err != nil { + return err + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + resp, err := c.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("%s failed with status %d: %s", path, resp.StatusCode, string(bodyBytes)) + } + return nil +} + +// doGet sends a GET request to the shim and returns the response body. +func (c *Client) doGet(ctx context.Context, path string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://vz-shim"+path, nil) + if err != nil { + return nil, err + } + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return io.ReadAll(resp.Body) +} + +func (c *Client) DeleteVM(ctx context.Context) error { + return c.doPut(ctx, "/api/v1/vm.shutdown", nil) +} + +func (c *Client) Shutdown(ctx context.Context) error { + req, err := http.NewRequestWithContext(ctx, http.MethodPut, "http://vz-shim/api/v1/vmm.shutdown", nil) + if err != nil { + return err + } + resp, err := c.httpClient.Do(req) + if err != nil { + // Connection reset is expected when shim exits + return nil + } + defer resp.Body.Close() + return nil +} + +func (c *Client) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) { + body, err := c.doGet(ctx, "/api/v1/vm.info") + if err != nil { + return nil, fmt.Errorf("get vm info: %w", err) + } + + var info vmInfoResponse + if err := json.Unmarshal(body, &info); err != nil { + return nil, fmt.Errorf("decode vm info: %w", err) + } + + var state hypervisor.VMState + switch info.State { + case "Running": + state = hypervisor.StateRunning + case "Paused": + state = hypervisor.StatePaused + case "Starting": + state = hypervisor.StateCreated + case "Shutdown", "Stopped", "Error": + state = hypervisor.StateShutdown + default: + state = hypervisor.StateShutdown + } + + return &hypervisor.VMInfo{State: state}, nil +} + +func (c *Client) Pause(ctx context.Context) error { + return c.doPut(ctx, "/api/v1/vm.pause", nil) +} + +func (c *Client) Resume(ctx context.Context) error { + return c.doPut(ctx, "/api/v1/vm.resume", nil) +} + +func (c *Client) Snapshot(ctx context.Context, destPath string) error { + return hypervisor.ErrNotSupported +} + +func (c *Client) ResizeMemory(ctx context.Context, bytes int64) error { + return hypervisor.ErrNotSupported +} + +func (c *Client) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error { + return hypervisor.ErrNotSupported +} diff --git a/lib/hypervisor/vz/shimconfig/config.go b/lib/hypervisor/vz/shimconfig/config.go new file mode 100644 index 00000000..23056267 --- /dev/null +++ b/lib/hypervisor/vz/shimconfig/config.go @@ -0,0 +1,44 @@ +//go:build darwin + +// Package shimconfig defines the configuration types shared between +// the hypeman API server and the vz-shim subprocess. +package shimconfig + +// ShimConfig is the configuration passed from hypeman to the shim. +type ShimConfig struct { + // Compute resources + VCPUs int `json:"vcpus"` + MemoryBytes int64 `json:"memory_bytes"` + + // Storage + Disks []DiskConfig `json:"disks"` + + // Network + Networks []NetworkConfig `json:"networks"` + + // Console + SerialLogPath string `json:"serial_log_path"` + + // Boot configuration + KernelPath string `json:"kernel_path"` + InitrdPath string `json:"initrd_path"` + KernelArgs string `json:"kernel_args"` + + // Socket paths (where shim should listen) + ControlSocket string `json:"control_socket"` + VsockSocket string `json:"vsock_socket"` + + // Logging + LogPath string `json:"log_path"` +} + +// DiskConfig represents a disk attached to the VM. +type DiskConfig struct { + Path string `json:"path"` + Readonly bool `json:"readonly"` +} + +// NetworkConfig represents a network interface. +type NetworkConfig struct { + MAC string `json:"mac"` +} diff --git a/lib/hypervisor/vz/starter.go b/lib/hypervisor/vz/starter.go new file mode 100644 index 00000000..15ea9847 --- /dev/null +++ b/lib/hypervisor/vz/starter.go @@ -0,0 +1,202 @@ +//go:build darwin + +// Package vz implements the hypervisor.Hypervisor interface for +// Apple's Virtualization.framework on macOS via the vz-shim subprocess. +package vz + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig" + "github.com/kernel/hypeman/lib/logger" + "github.com/kernel/hypeman/lib/paths" +) + +func init() { + hypervisor.RegisterSocketName(hypervisor.TypeVZ, "vz.sock") + hypervisor.RegisterVsockDialerFactory(hypervisor.TypeVZ, NewVsockDialer) + hypervisor.RegisterClientFactory(hypervisor.TypeVZ, func(socketPath string) (hypervisor.Hypervisor, error) { + return NewClient(socketPath) + }) +} + +var ( + shimOnce sync.Once + shimPath string + shimErr error +) + +// extractShim extracts the embedded vz-shim binary to a temp file and codesigns it. +func extractShim() (string, error) { + shimOnce.Do(func() { + f, err := os.CreateTemp("", "vz-shim-*") + if err != nil { + shimErr = fmt.Errorf("create temp file: %w", err) + return + } + defer f.Close() + + if _, err := f.Write(vzShimBinary); err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("write vz-shim binary: %w", err) + return + } + + if err := f.Chmod(0755); err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("chmod vz-shim binary: %w", err) + return + } + + // Codesign with entitlements for Virtualization.framework + cmd := exec.Command("codesign", "--sign", "-", "--entitlements", entitlementsPath(), "--force", f.Name()) + if out, err := cmd.CombinedOutput(); err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("codesign vz-shim: %s: %w", string(out), err) + return + } + + shimPath = f.Name() + }) + return shimPath, shimErr +} + +// entitlementsPath returns the path to the vz.entitlements file. +func entitlementsPath() string { + exe, err := os.Executable() + if err != nil { + return "vz.entitlements" + } + return filepath.Join(filepath.Dir(exe), "vz.entitlements") +} + +// Starter implements hypervisor.VMStarter for Virtualization.framework. +type Starter struct{} + +// NewStarter creates a new vz starter. +func NewStarter() *Starter { + return &Starter{} +} + +var _ hypervisor.VMStarter = (*Starter)(nil) + +func (s *Starter) SocketName() string { + return "vz.sock" +} + +// GetBinaryPath extracts the embedded vz-shim and returns its path. +func (s *Starter) GetBinaryPath(p *paths.Paths, version string) (string, error) { + return extractShim() +} + +// GetVersion returns "vz-shim". +func (s *Starter) GetVersion(p *paths.Paths) (string, error) { + return "vz-shim", nil +} + +// StartVM spawns a vz-shim subprocess to host the VM. +func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, socketPath string, config hypervisor.VMConfig) (int, hypervisor.Hypervisor, error) { + log := logger.FromContext(ctx) + + instanceDir := filepath.Dir(socketPath) + controlSocket := socketPath + vsockSocket := filepath.Join(instanceDir, "vz.vsock") + logPath := filepath.Join(instanceDir, "logs", "vz-shim.log") + + shimConfig := shimconfig.ShimConfig{ + VCPUs: config.VCPUs, + MemoryBytes: config.MemoryBytes, + SerialLogPath: config.SerialLogPath, + KernelPath: config.KernelPath, + InitrdPath: config.InitrdPath, + KernelArgs: config.KernelArgs, + ControlSocket: controlSocket, + VsockSocket: vsockSocket, + LogPath: logPath, + } + + for _, disk := range config.Disks { + shimConfig.Disks = append(shimConfig.Disks, shimconfig.DiskConfig{ + Path: disk.Path, + Readonly: disk.Readonly, + }) + } + + for _, net := range config.Networks { + shimConfig.Networks = append(shimConfig.Networks, shimconfig.NetworkConfig{ + MAC: net.MAC, + }) + } + + configJSON, err := json.Marshal(shimConfig) + if err != nil { + return 0, nil, fmt.Errorf("marshal shim config: %w", err) + } + + log.DebugContext(ctx, "spawning vz-shim", "config", string(configJSON)) + + shimBinary, err := s.GetBinaryPath(p, version) + if err != nil { + return 0, nil, fmt.Errorf("get vz-shim binary: %w", err) + } + + cmd := exec.Command(shimBinary, "-config", string(configJSON)) + cmd.Stdout = nil + cmd.Stderr = nil + cmd.Stdin = nil + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + if err := cmd.Start(); err != nil { + return 0, nil, fmt.Errorf("start vz-shim: %w", err) + } + + pid := cmd.Process.Pid + log.InfoContext(ctx, "vz-shim started", "pid", pid, "control_socket", controlSocket) + + client, err := s.waitForShim(ctx, controlSocket, 30*time.Second) + if err != nil { + cmd.Process.Kill() + return 0, nil, fmt.Errorf("connect to vz-shim: %w", err) + } + + cmd.Process.Release() + + return pid, client, nil +} + +// RestoreVM is not supported by vz (Virtualization.framework cannot restore Linux guests). +func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) { + return 0, nil, hypervisor.ErrNotSupported +} + +func (s *Starter) waitForShim(ctx context.Context, socketPath string, timeout time.Duration) (*Client, error) { + deadline := time.Now().Add(timeout) + + for time.Now().Before(deadline) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + client, err := NewClient(socketPath) + if err == nil { + return client, nil + } + + time.Sleep(100 * time.Millisecond) + } + + return nil, fmt.Errorf("timeout waiting for shim socket: %s", socketPath) +} diff --git a/lib/hypervisor/vz/vsock.go b/lib/hypervisor/vz/vsock.go new file mode 100644 index 00000000..2243ab7d --- /dev/null +++ b/lib/hypervisor/vz/vsock.go @@ -0,0 +1,111 @@ +//go:build darwin + +package vz + +import ( + "bufio" + "context" + "fmt" + "log/slog" + "net" + "strings" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" +) + +const ( + vsockDialTimeout = 5 * time.Second + vsockHandshakeTimeout = 5 * time.Second +) + +// VsockDialer implements hypervisor.VsockDialer for vz via the shim's Unix socket proxy. +// Uses the same protocol as Cloud Hypervisor: CONNECT {port}\n -> OK {port}\n +type VsockDialer struct { + socketPath string // path to vz.vsock Unix socket +} + +// NewVsockDialer creates a new VsockDialer for vz. +// vsockSocket is the path to the vz.vsock Unix socket proxy. +// vsockCID is unused because the vz proxy is per-VM (unlike QEMU which uses kernel AF_VSOCK with CID routing). +func NewVsockDialer(vsockSocket string, vsockCID int64) hypervisor.VsockDialer { + return &VsockDialer{ + socketPath: vsockSocket, + } +} + +// Key returns a unique identifier for this dialer, used for connection pooling. +func (d *VsockDialer) Key() string { + return "vz:" + d.socketPath +} + +// DialVsock connects to the guest on the specified port via the shim's vsock proxy. +func (d *VsockDialer) DialVsock(ctx context.Context, port int) (net.Conn, error) { + slog.DebugContext(ctx, "connecting to vsock via shim proxy", "socket", d.socketPath, "port", port) + + // Use dial timeout, respecting context deadline if shorter + dialTimeout := vsockDialTimeout + if deadline, ok := ctx.Deadline(); ok { + if remaining := time.Until(deadline); remaining < dialTimeout { + dialTimeout = remaining + } + } + + // Connect to the shim's vsock proxy Unix socket + dialer := net.Dialer{Timeout: dialTimeout} + conn, err := dialer.DialContext(ctx, "unix", d.socketPath) + if err != nil { + return nil, fmt.Errorf("dial vsock proxy socket %s: %w", d.socketPath, err) + } + + slog.DebugContext(ctx, "connected to vsock proxy, performing handshake", "port", port) + + // Set deadline for handshake + if err := conn.SetDeadline(time.Now().Add(vsockHandshakeTimeout)); err != nil { + conn.Close() + return nil, fmt.Errorf("set handshake deadline: %w", err) + } + + // Perform handshake (same protocol as Cloud Hypervisor) + handshakeCmd := fmt.Sprintf("CONNECT %d\n", port) + if _, err := conn.Write([]byte(handshakeCmd)); err != nil { + conn.Close() + return nil, fmt.Errorf("send vsock handshake: %w", err) + } + + // Read handshake response + reader := bufio.NewReader(conn) + response, err := reader.ReadString('\n') + if err != nil { + conn.Close() + return nil, fmt.Errorf("read vsock handshake response (is guest-agent running?): %w", err) + } + + // Clear deadline after successful handshake + if err := conn.SetDeadline(time.Time{}); err != nil { + conn.Close() + return nil, fmt.Errorf("clear deadline: %w", err) + } + + response = strings.TrimSpace(response) + if !strings.HasPrefix(response, "OK ") { + conn.Close() + return nil, fmt.Errorf("vsock handshake failed: %s", response) + } + + slog.DebugContext(ctx, "vsock handshake successful", "response", response) + + // Return wrapped connection that uses the bufio.Reader + return &bufferedConn{Conn: conn, reader: reader}, nil +} + +// bufferedConn wraps a net.Conn with a bufio.Reader to ensure any buffered +// data from the handshake is properly drained before reading from the connection. +type bufferedConn struct { + net.Conn + reader *bufio.Reader +} + +func (c *bufferedConn) Read(p []byte) (int, error) { + return c.reader.Read(p) +} diff --git a/lib/hypervisor/vz/vz-shim/.gitkeep b/lib/hypervisor/vz/vz-shim/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/lib/hypervisor/vz/vz_shim_binary.go b/lib/hypervisor/vz/vz_shim_binary.go new file mode 100644 index 00000000..fd278761 --- /dev/null +++ b/lib/hypervisor/vz/vz_shim_binary.go @@ -0,0 +1,11 @@ +//go:build darwin + +package vz + +import _ "embed" + +// vzShimBinary contains the embedded vz-shim binary. +// Built by the Makefile before the main binary is compiled. +// +//go:embed vz-shim/vz-shim +var vzShimBinary []byte diff --git a/lib/instances/README.md b/lib/instances/README.md index a2d42172..51a245ef 100644 --- a/lib/instances/README.md +++ b/lib/instances/README.md @@ -1,12 +1,12 @@ # Instance Manager -Manages VM instance lifecycle using Cloud Hypervisor. +Manages VM instance lifecycle across multiple hypervisors (Cloud Hypervisor, QEMU on Linux; vz on macOS). ## Design Decisions ### Why State Machine? (state.go) -**What:** Single-hop state transitions matching Cloud Hypervisor's actual states +**What:** Single-hop state transitions matching hypervisor states **Why:** - Validates transitions before execution (prevents invalid operations) @@ -132,6 +132,6 @@ TestStorageOperations - metadata persistence, directory cleanup - `lib/images` - Image manager for OCI image validation - `lib/system` - System manager for kernel/initrd files -- `lib/vmm` - Cloud Hypervisor client for VM operations -- System tools: `mkfs.erofs`, `cpio`, `gzip` +- `lib/hypervisor` - Hypervisor abstraction for VM operations +- System tools: `mkfs.erofs`, `cpio`, `gzip` (Linux); `mkfs.ext4` (macOS) diff --git a/lib/instances/create.go b/lib/instances/create.go index 33724968..003a984c 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -113,6 +113,11 @@ func (m *manager) createInstance( vsockSocket := m.paths.InstanceVsockSocket(id) log.DebugContext(ctx, "generated vsock config", "instance_id", id, "cid", vsockCID) + // Override vsock socket path for vz (uses Virtio socket, not vhost-user) + if req.Hypervisor == hypervisor.TypeVZ || (req.Hypervisor == "" && m.defaultHypervisor == hypervisor.TypeVZ) { + vsockSocket = filepath.Join(m.paths.InstanceDir(id), "vz.vsock") + } + // 5. Check instance doesn't already exist if _, err := m.loadMetadata(id); err == nil { return nil, ErrAlreadyExists @@ -712,10 +717,19 @@ func (m *manager) buildHypervisorConfig(ctx context.Context, inst *Instance, ima PCIDevices: pciDevices, KernelPath: kernelPath, InitrdPath: initrdPath, - KernelArgs: "console=ttyS0", + KernelArgs: m.kernelArgs(inst.HypervisorType), }, nil } +// kernelArgs returns the kernel command line arguments for the given hypervisor type. +// vz uses hvc0 (virtio console), all others use ttyS0 (serial port). +func (m *manager) kernelArgs(hvType hypervisor.Type) string { + if hvType == hypervisor.TypeVZ { + return "console=hvc0" + } + return "console=ttyS0" +} + func ptr[T any](v T) *T { return &v } diff --git a/lib/network/README.md b/lib/network/README.md index 1e771532..c54e66a8 100644 --- a/lib/network/README.md +++ b/lib/network/README.md @@ -1,6 +1,21 @@ # Network Manager -Manages the default virtual network for instances using a Linux bridge and TAP devices. +Manages the default virtual network for instances. + +## Platform Support + +| Platform | Network Model | Implementation | +|----------|---------------|----------------| +| Linux | Bridge + TAP | Linux bridge with TAP devices per VM, iptables NAT | +| macOS | NAT | Virtualization.framework built-in NAT (192.168.64.0/24) | + +On macOS, the network manager skips bridge/TAP creation since vz provides NAT networking automatically. + +--- + +## Linux Networking + +On Linux, hypeman manages a virtual network using a Linux bridge and TAP devices. ## How Linux VM Networking Works diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh new file mode 100755 index 00000000..41a506dd --- /dev/null +++ b/scripts/e2e-install-test.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# +# Hypeman E2E Install Test +# +# Runs a full install → verify → uninstall cycle. +# Platform-agnostic: works on both Linux and macOS. +# + +set -e + +# Colors +RED='\033[38;2;255;110;110m' +GREEN='\033[38;2;92;190;83m' +YELLOW='\033[0;33m' +NC='\033[0m' + +info() { echo -e "${GREEN}[INFO]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; exit 1; } + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +OS=$(uname -s | tr '[:upper:]' '[:lower:]') + +cd "$REPO_DIR" + +# ============================================================================= +# Phase 1: Clean slate +# ============================================================================= +info "Phase 1: Cleaning previous installation..." +bash scripts/uninstall.sh 2>/dev/null || true + +# ============================================================================= +# Phase 2: Install from source +# ============================================================================= +info "Phase 2: Installing from source..." +BRANCH=$(git rev-parse --abbrev-ref HEAD) +BRANCH="$BRANCH" bash scripts/install.sh + +# ============================================================================= +# Phase 3: Wait for service +# ============================================================================= +info "Phase 3: Waiting for service to be healthy..." + +PORT=8080 +TIMEOUT=60 +ELAPSED=0 + +while [ $ELAPSED -lt $TIMEOUT ]; do + if curl -sf "http://localhost:${PORT}/health" >/dev/null 2>&1; then + pass "Service is responding on port ${PORT}" + break + fi + sleep 2 + ELAPSED=$((ELAPSED + 2)) +done + +if [ $ELAPSED -ge $TIMEOUT ]; then + # Dump logs for debugging + if [ "$OS" = "darwin" ]; then + LOG_FILE="$HOME/Library/Application Support/hypeman/logs/hypeman.log" + if [ -f "$LOG_FILE" ]; then + warn "Service logs (last 50 lines):" + tail -50 "$LOG_FILE" || true + else + warn "No log file found at $LOG_FILE" + fi + warn "launchctl list:" + launchctl list | grep hypeman || true + fi + fail "Service did not become healthy within ${TIMEOUT}s" +fi + +# ============================================================================= +# Phase 4: Validate installation +# ============================================================================= +info "Phase 4: Validating installation..." + +# Check binaries +if [ "$OS" = "darwin" ]; then + [ -x /usr/local/bin/hypeman-api ] || fail "hypeman-api binary not found" + [ -x /usr/local/bin/vz-shim ] || fail "vz-shim binary not found" + pass "Binaries installed correctly" + + # Check launchd service + if launchctl list | grep -q com.kernel.hypeman; then + pass "launchd service is loaded" + else + fail "launchd service not loaded" + fi +else + [ -x /opt/hypeman/bin/hypeman-api ] || fail "hypeman-api binary not found" + pass "Binaries installed correctly" + + # Check systemd service + if systemctl is-active --quiet hypeman; then + pass "systemd service is running" + else + fail "systemd service not running" + fi +fi + +# Check config +if [ "$OS" = "darwin" ]; then + [ -f "$HOME/.config/hypeman/config" ] || fail "Config file not found" +else + [ -f /etc/hypeman/config ] || fail "Config file not found" +fi +pass "Config file exists" + +# ============================================================================= +# Phase 5: Cleanup +# ============================================================================= +info "Phase 5: Cleaning up..." +KEEP_DATA=false bash scripts/uninstall.sh + +# ============================================================================= +# Phase 6: Verify cleanup +# ============================================================================= +info "Phase 6: Verifying cleanup..." + +if [ "$OS" = "darwin" ]; then + [ ! -f /usr/local/bin/hypeman-api ] || fail "hypeman-api binary still exists after uninstall" + if launchctl list 2>/dev/null | grep -q com.kernel.hypeman; then + fail "launchd service still loaded after uninstall" + fi +else + [ ! -f /opt/hypeman/bin/hypeman-api ] || fail "hypeman-api binary still exists after uninstall" + if systemctl is-active --quiet hypeman 2>/dev/null; then + fail "systemd service still running after uninstall" + fi +fi +pass "Cleanup verified" + +# ============================================================================= +# Done +# ============================================================================= +echo "" +info "All E2E install tests passed!" diff --git a/scripts/install.sh b/scripts/install.sh index 063241f4..0959d66b 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -10,20 +10,15 @@ # CLI_VERSION - Install specific CLI version (default: latest) # BRANCH - Build from source using this branch (for development/testing) # BINARY_DIR - Use binaries from this directory instead of building/downloading -# INSTALL_DIR - Binary installation directory (default: /opt/hypeman/bin) -# DATA_DIR - Data directory (default: /var/lib/hypeman) -# CONFIG_DIR - Config directory (default: /etc/hypeman) +# INSTALL_DIR - Binary installation directory (default: /opt/hypeman/bin on Linux, /usr/local/bin on macOS) +# DATA_DIR - Data directory (default: /var/lib/hypeman on Linux, ~/Library/Application Support/hypeman on macOS) +# CONFIG_DIR - Config directory (default: /etc/hypeman on Linux, ~/.config/hypeman on macOS) # set -e REPO="kernel/hypeman" BINARY_NAME="hypeman-api" -INSTALL_DIR="${INSTALL_DIR:-/opt/hypeman/bin}" -DATA_DIR="${DATA_DIR:-/var/lib/hypeman}" -CONFIG_DIR="${CONFIG_DIR:-/etc/hypeman}" -CONFIG_FILE="${CONFIG_DIR}/config" -SYSTEMD_DIR="/etc/systemd/system" SERVICE_NAME="hypeman" # Colors for output (true color) @@ -45,57 +40,115 @@ find_release_with_artifact() { local archive_prefix="$2" local os="$3" local arch="$4" - + # Fetch recent release tags (up to 10) local tags tags=$(curl -fsSL "https://api.github.com/repos/${repo}/releases?per_page=10" 2>/dev/null | grep '"tag_name"' | cut -d'"' -f4) if [ -z "$tags" ]; then return 1 fi - + # Check each release for the artifact for tag in $tags; do local version_num="${tag#v}" local artifact_name="${archive_prefix}_${version_num}_${os}_${arch}.tar.gz" local artifact_url="https://github.com/${repo}/releases/download/${tag}/${artifact_name}" - + # Check if artifact exists (follow redirects, fail silently) if curl -fsSL --head "$artifact_url" >/dev/null 2>&1; then echo "$tag" return 0 fi done - + return 1 } +# ============================================================================= +# Detect OS and architecture (before pre-flight checks) +# ============================================================================= + +OS=$(uname -s | tr '[:upper:]' '[:lower:]') +ARCH=$(uname -m) +case $ARCH in + x86_64|amd64) + ARCH="amd64" + ;; + aarch64|arm64) + ARCH="arm64" + ;; + *) + error "Unsupported architecture: $ARCH (supported: amd64, arm64)" + ;; +esac + +if [ "$OS" != "linux" ] && [ "$OS" != "darwin" ]; then + error "Unsupported OS: $OS (supported: linux, darwin)" +fi + +# ============================================================================= +# OS-conditional defaults +# ============================================================================= + +if [ "$OS" = "darwin" ]; then + INSTALL_DIR="${INSTALL_DIR:-/usr/local/bin}" + DATA_DIR="${DATA_DIR:-$HOME/Library/Application Support/hypeman}" + CONFIG_DIR="${CONFIG_DIR:-$HOME/.config/hypeman}" +else + INSTALL_DIR="${INSTALL_DIR:-/opt/hypeman/bin}" + DATA_DIR="${DATA_DIR:-/var/lib/hypeman}" + CONFIG_DIR="${CONFIG_DIR:-/etc/hypeman}" +fi + +CONFIG_FILE="${CONFIG_DIR}/config" +SYSTEMD_DIR="/etc/systemd/system" + # ============================================================================= # Pre-flight checks - verify all requirements before doing anything # ============================================================================= info "Running pre-flight checks..." -# Check for root or sudo access SUDO="" -if [ "$EUID" -ne 0 ]; then - if ! command -v sudo >/dev/null 2>&1; then - error "This script requires root privileges. Please run as root or install sudo." +if [ "$OS" = "darwin" ]; then + # macOS pre-flight + if [ "$ARCH" != "arm64" ]; then + error "Intel Macs not supported" fi - # Try passwordless sudo first, then prompt from terminal if needed - if ! sudo -n true 2>/dev/null; then - info "Requesting sudo privileges..." - # Read password from /dev/tty (terminal) even when script is piped - if ! sudo -v < /dev/tty; then - error "Failed to obtain sudo privileges" + command -v codesign >/dev/null 2>&1 || error "codesign is required but not installed (install Xcode Command Line Tools)" + command -v docker >/dev/null 2>&1 || error "Docker CLI is required but not found. Install Docker via Colima or Docker Desktop." + # Check if we need sudo for INSTALL_DIR + if [ ! -w "$INSTALL_DIR" ] 2>/dev/null && [ ! -w "$(dirname "$INSTALL_DIR")" ] 2>/dev/null; then + if command -v sudo >/dev/null 2>&1; then + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges (needed for $INSTALL_DIR)..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi + fi + SUDO="sudo" + fi + fi +else + # Linux pre-flight + if [ "$EUID" -ne 0 ]; then + if ! command -v sudo >/dev/null 2>&1; then + error "This script requires root privileges. Please run as root or install sudo." + fi + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi fi + SUDO="sudo" fi - SUDO="sudo" + command -v systemctl >/dev/null 2>&1 || error "systemctl is required but not installed (systemd not available?)" fi -# Check for required commands +# Common checks command -v curl >/dev/null 2>&1 || error "curl is required but not installed" command -v tar >/dev/null 2>&1 || error "tar is required but not installed" -command -v systemctl >/dev/null 2>&1 || error "systemctl is required but not installed (systemd not available?)" command -v openssl >/dev/null 2>&1 || error "openssl is required but not installed" # Count how many of BRANCH, VERSION, BINARY_DIR are set @@ -122,73 +175,56 @@ if [ -n "$BINARY_DIR" ]; then fi fi -# Detect OS -OS=$(uname -s | tr '[:upper:]' '[:lower:]') -if [ "$OS" != "linux" ]; then - error "Hypeman only supports Linux (detected: $OS)" -fi - -# Detect architecture -ARCH=$(uname -m) -case $ARCH in - x86_64|amd64) - ARCH="amd64" - ;; - aarch64|arm64) - ARCH="arm64" - ;; - *) - error "Unsupported architecture: $ARCH (supported: amd64, arm64)" - ;; -esac - info "Pre-flight checks passed" # ============================================================================= # System Configuration - KVM access and network capabilities # ============================================================================= -# Get the installing user (for adding to groups) INSTALL_USER="${SUDO_USER:-$(whoami)}" -# Ensure KVM access -if [ -e /dev/kvm ]; then - if getent group kvm &>/dev/null; then - if ! groups "$INSTALL_USER" 2>/dev/null | grep -qw kvm; then - info "Adding user ${INSTALL_USER} to kvm group..." - $SUDO usermod -aG kvm "$INSTALL_USER" - warn "You may need to log out and back in for kvm group membership to take effect" +if [ "$OS" = "darwin" ]; then + info "macOS uses NAT networking via Virtualization.framework, no system config needed" +else + # Ensure KVM access + if [ -e /dev/kvm ]; then + if getent group kvm &>/dev/null; then + if ! groups "$INSTALL_USER" 2>/dev/null | grep -qw kvm; then + info "Adding user ${INSTALL_USER} to kvm group..." + $SUDO usermod -aG kvm "$INSTALL_USER" + warn "You may need to log out and back in for kvm group membership to take effect" + fi fi + else + warn "/dev/kvm not found - KVM may not be available on this system" fi -else - warn "/dev/kvm not found - KVM may not be available on this system" -fi -# Enable IPv4 forwarding (required for VM networking) -CURRENT_IP_FORWARD=$(sysctl -n net.ipv4.ip_forward 2>/dev/null || echo "0") -if [ "$CURRENT_IP_FORWARD" != "1" ]; then - info "Enabling IPv4 forwarding..." - $SUDO sysctl -w net.ipv4.ip_forward=1 > /dev/null - - # Make it persistent across reboots - if [ -d /etc/sysctl.d ]; then - echo 'net.ipv4.ip_forward=1' | $SUDO tee /etc/sysctl.d/99-hypeman.conf > /dev/null - elif ! grep -q '^net.ipv4.ip_forward=1' /etc/sysctl.conf 2>/dev/null; then - echo 'net.ipv4.ip_forward=1' | $SUDO tee -a /etc/sysctl.conf > /dev/null + # Enable IPv4 forwarding (required for VM networking) + CURRENT_IP_FORWARD=$(sysctl -n net.ipv4.ip_forward 2>/dev/null || echo "0") + if [ "$CURRENT_IP_FORWARD" != "1" ]; then + info "Enabling IPv4 forwarding..." + $SUDO sysctl -w net.ipv4.ip_forward=1 > /dev/null + + # Make it persistent across reboots + if [ -d /etc/sysctl.d ]; then + echo 'net.ipv4.ip_forward=1' | $SUDO tee /etc/sysctl.d/99-hypeman.conf > /dev/null + elif ! grep -q '^net.ipv4.ip_forward=1' /etc/sysctl.conf 2>/dev/null; then + echo 'net.ipv4.ip_forward=1' | $SUDO tee -a /etc/sysctl.conf > /dev/null + fi fi -fi -# Increase file descriptor limit for Caddy (ingress) -if [ -d /etc/security/limits.d ]; then - if [ ! -f /etc/security/limits.d/99-hypeman.conf ]; then - info "Configuring file descriptor limits for ingress..." - $SUDO tee /etc/security/limits.d/99-hypeman.conf > /dev/null << 'LIMITS' + # Increase file descriptor limit for Caddy (ingress) + if [ -d /etc/security/limits.d ]; then + if [ ! -f /etc/security/limits.d/99-hypeman.conf ]; then + info "Configuring file descriptor limits for ingress..." + $SUDO tee /etc/security/limits.d/99-hypeman.conf > /dev/null << 'LIMITS' # Hypeman: Increased file descriptor limits for Caddy ingress * soft nofile 65536 * hard nofile 65536 root soft nofile 65536 root hard nofile 65536 LIMITS + fi fi fi @@ -210,13 +246,22 @@ if [ -n "$BINARY_DIR" ]; then # Copy binaries to TMP_DIR info "Copying binaries from ${BINARY_DIR}..." - for f in "${BINARY_NAME}" "hypeman-token" ".env.example"; do - [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" - done + if [ "$OS" = "darwin" ]; then + for f in "${BINARY_NAME}" "vz-shim" "hypeman-token" ".env.darwin.example"; do + [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" + done + cp "${BINARY_DIR}/vz-shim" "${TMP_DIR}/vz-shim" + cp "${BINARY_DIR}/.env.darwin.example" "${TMP_DIR}/.env.darwin.example" + chmod +x "${TMP_DIR}/vz-shim" + else + for f in "${BINARY_NAME}" "hypeman-token" ".env.example"; do + [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" + done + cp "${BINARY_DIR}/.env.example" "${TMP_DIR}/.env.example" + fi cp "${BINARY_DIR}/${BINARY_NAME}" "${TMP_DIR}/${BINARY_NAME}" cp "${BINARY_DIR}/hypeman-token" "${TMP_DIR}/hypeman-token" - cp "${BINARY_DIR}/.env.example" "${TMP_DIR}/.env.example" # Make binaries executable chmod +x "${TMP_DIR}/${BINARY_NAME}" @@ -226,27 +271,47 @@ if [ -n "$BINARY_DIR" ]; then elif [ -n "$BRANCH" ]; then # Build from source mode info "Building from source (branch: $BRANCH)..." - + BUILD_DIR="${TMP_DIR}/hypeman" BUILD_LOG="${TMP_DIR}/build.log" - + # Clone repo (quiet) if ! git clone --branch "$BRANCH" --depth 1 -q "https://github.com/${REPO}.git" "$BUILD_DIR" 2>&1 | tee -a "$BUILD_LOG"; then error "Failed to clone repository. Build log:\n$(cat "$BUILD_LOG")" fi - + info "Building binaries (this may take a few minutes)..." cd "$BUILD_DIR" - - # Build main binary (includes dependencies) - capture output, show on error - if ! make build >> "$BUILD_LOG" 2>&1; then - echo "" - echo -e "${RED}Build failed. Full build log:${NC}" - cat "$BUILD_LOG" - error "Build failed" + + if [ "$OS" = "darwin" ]; then + # macOS: build darwin targets and sign + if ! make build-darwin >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Build failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Build failed" + fi + if ! make sign-darwin >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Signing failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Signing failed" + fi + cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" + cp "bin/vz-shim" "${TMP_DIR}/vz-shim" + cp ".env.darwin.example" "${TMP_DIR}/.env.darwin.example" + else + # Linux: standard build + if ! make build >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Build failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Build failed" + fi + cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" + cp ".env.example" "${TMP_DIR}/.env.example" fi - cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" - + # Build hypeman-token (not included in make build) if ! go build -o "${TMP_DIR}/hypeman-token" ./cmd/gen-jwt >> "$BUILD_LOG" 2>&1; then echo "" @@ -254,13 +319,10 @@ elif [ -n "$BRANCH" ]; then cat "$BUILD_LOG" error "Failed to build hypeman-token" fi - - # Copy .env.example for config template - cp ".env.example" "${TMP_DIR}/.env.example" - + VERSION="$BRANCH (source)" cd - > /dev/null - + info "Build complete" else # Download release mode @@ -285,15 +347,30 @@ else info "Extracting..." tar -xzf "${TMP_DIR}/${ARCHIVE_NAME}" -C "$TMP_DIR" + + # On macOS, codesign after extraction + if [ "$OS" = "darwin" ]; then + info "Signing binaries..." + codesign --force --sign - "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null || true + [ -f "${TMP_DIR}/vz-shim" ] && codesign --force --sign - "${TMP_DIR}/vz-shim" 2>/dev/null || true + fi fi # ============================================================================= # Stop existing service if running # ============================================================================= -if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Stopping existing ${SERVICE_NAME} service..." - $SUDO systemctl stop "$SERVICE_NAME" +if [ "$OS" = "darwin" ]; then + PLIST_PATH="$HOME/Library/LaunchAgents/com.kernel.hypeman.plist" + if [ -f "$PLIST_PATH" ]; then + info "Stopping existing ${SERVICE_NAME} service..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + fi +else + if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Stopping existing ${SERVICE_NAME} service..." + $SUDO systemctl stop "$SERVICE_NAME" + fi fi # ============================================================================= @@ -308,70 +385,184 @@ $SUDO install -m 755 "${TMP_DIR}/${BINARY_NAME}" "${INSTALL_DIR}/${BINARY_NAME}" info "Installing hypeman-token to ${INSTALL_DIR}..." $SUDO install -m 755 "${TMP_DIR}/hypeman-token" "${INSTALL_DIR}/hypeman-token" -# Install wrapper script to /usr/local/bin for easy access -info "Installing hypeman-token wrapper to /usr/local/bin..." -$SUDO tee /usr/local/bin/hypeman-token > /dev/null << EOF +# Install vz-shim on macOS +if [ "$OS" = "darwin" ] && [ -f "${TMP_DIR}/vz-shim" ]; then + info "Installing vz-shim to ${INSTALL_DIR}..." + $SUDO install -m 755 "${TMP_DIR}/vz-shim" "${INSTALL_DIR}/vz-shim" +fi + +if [ "$OS" = "linux" ]; then + # Install wrapper script to /usr/local/bin for easy access + info "Installing hypeman-token wrapper to /usr/local/bin..." + $SUDO tee /usr/local/bin/hypeman-token > /dev/null << EOF #!/bin/bash -# Wrapper script for hypeman-token that loads config from /etc/hypeman/config +# Wrapper script for hypeman-token that loads config from ${CONFIG_FILE} set -a source ${CONFIG_FILE} set +a exec ${INSTALL_DIR}/hypeman-token "\$@" EOF -$SUDO chmod 755 /usr/local/bin/hypeman-token + $SUDO chmod 755 /usr/local/bin/hypeman-token +fi # ============================================================================= # Create directories # ============================================================================= info "Creating data directory at ${DATA_DIR}..." -$SUDO mkdir -p "$DATA_DIR" +if [ "$OS" = "darwin" ]; then + mkdir -p "$DATA_DIR" + mkdir -p "$DATA_DIR/logs" +else + $SUDO mkdir -p "$DATA_DIR" +fi info "Creating config directory at ${CONFIG_DIR}..." -$SUDO mkdir -p "$CONFIG_DIR" +if [ "$OS" = "darwin" ]; then + mkdir -p "$CONFIG_DIR" +else + $SUDO mkdir -p "$CONFIG_DIR" +fi # ============================================================================= # Create config file (if it doesn't exist) # ============================================================================= if [ ! -f "$CONFIG_FILE" ]; then - # Get config template (from local build or download from repo) - if [ -f "${TMP_DIR}/.env.example" ]; then - info "Using config template from source..." - cp "${TMP_DIR}/.env.example" "${TMP_DIR}/config" + if [ "$OS" = "darwin" ]; then + # macOS config + if [ -f "${TMP_DIR}/.env.darwin.example" ]; then + info "Using macOS config template from source..." + cp "${TMP_DIR}/.env.darwin.example" "${TMP_DIR}/config" + else + info "Downloading macOS config template..." + CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.darwin.example" + if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then + error "Failed to download config template from ${CONFIG_URL}" + fi + fi + + # Expand ~ to $HOME (launchd doesn't do shell expansion) + sed -i '' "s|~/|${HOME}/|g" "${TMP_DIR}/config" + + # Generate random JWT secret + info "Generating JWT secret..." + JWT_SECRET=$(openssl rand -hex 32) + sed -i '' "s/^JWT_SECRET=.*/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" + + # Auto-detect Docker socket + DOCKER_SOCKET="" + if [ -n "$DOCKER_HOST" ]; then + DOCKER_SOCKET="${DOCKER_HOST#unix://}" + elif [ -S /var/run/docker.sock ]; then + DOCKER_SOCKET="/var/run/docker.sock" + elif [ -S "$HOME/.colima/default/docker.sock" ]; then + DOCKER_SOCKET="$HOME/.colima/default/docker.sock" + fi + if [ -n "$DOCKER_SOCKET" ]; then + info "Detected Docker socket: ${DOCKER_SOCKET}" + if grep -q '^DOCKER_SOCKET=' "${TMP_DIR}/config"; then + sed -i '' "s|^DOCKER_SOCKET=.*|DOCKER_SOCKET=${DOCKER_SOCKET}|" "${TMP_DIR}/config" + elif grep -q '^# DOCKER_SOCKET=' "${TMP_DIR}/config"; then + sed -i '' "s|^# DOCKER_SOCKET=.*|DOCKER_SOCKET=${DOCKER_SOCKET}|" "${TMP_DIR}/config" + else + echo "DOCKER_SOCKET=${DOCKER_SOCKET}" >> "${TMP_DIR}/config" + fi + fi + + info "Installing config file at ${CONFIG_FILE}..." + install -m 600 "${TMP_DIR}/config" "$CONFIG_FILE" else - info "Downloading config template..." - CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.example" - if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then - error "Failed to download config template from ${CONFIG_URL}" + # Linux config + if [ -f "${TMP_DIR}/.env.example" ]; then + info "Using config template from source..." + cp "${TMP_DIR}/.env.example" "${TMP_DIR}/config" + else + info "Downloading config template..." + CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.example" + if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then + error "Failed to download config template from ${CONFIG_URL}" + fi fi + + # Generate random JWT secret + info "Generating JWT secret..." + JWT_SECRET=$(openssl rand -hex 32) + sed -i "s/^JWT_SECRET=$/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" + + # Set fixed ports for production (instead of random ports used in dev) + sed -i "s/^# CADDY_ADMIN_PORT=.*/CADDY_ADMIN_PORT=2019/" "${TMP_DIR}/config" + sed -i "s/^# INTERNAL_DNS_PORT=.*/INTERNAL_DNS_PORT=5353/" "${TMP_DIR}/config" + + info "Installing config file at ${CONFIG_FILE}..." + $SUDO install -m 640 "${TMP_DIR}/config" "$CONFIG_FILE" + $SUDO chown root:root "$CONFIG_FILE" fi - - # Generate random JWT secret - info "Generating JWT secret..." - JWT_SECRET=$(openssl rand -hex 32) - sed -i "s/^JWT_SECRET=$/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" - - # Set fixed ports for production (instead of random ports used in dev) - # Replace entire line to avoid trailing comments being included in the value - sed -i "s/^# CADDY_ADMIN_PORT=.*/CADDY_ADMIN_PORT=2019/" "${TMP_DIR}/config" - sed -i "s/^# INTERNAL_DNS_PORT=.*/INTERNAL_DNS_PORT=5353/" "${TMP_DIR}/config" - - info "Installing config file at ${CONFIG_FILE}..." - # Config is 640 root:root - intentionally requires root/sudo to read since it contains JWT_SECRET. - # The hypeman service runs as root and the CLI wrapper uses sudo to source the config. - $SUDO install -m 640 "${TMP_DIR}/config" "$CONFIG_FILE" - $SUDO chown root:root "$CONFIG_FILE" else info "Config file already exists at ${CONFIG_FILE}, skipping..." fi # ============================================================================= -# Install systemd service +# Install service # ============================================================================= -info "Installing systemd service..." -$SUDO tee "${SYSTEMD_DIR}/${SERVICE_NAME}.service" > /dev/null << EOF +if [ "$OS" = "darwin" ]; then + # macOS: launchd plist + PLIST_DIR="$HOME/Library/LaunchAgents" + PLIST_PATH="${PLIST_DIR}/com.kernel.hypeman.plist" + mkdir -p "$PLIST_DIR" + + info "Installing launchd service..." + + # Build environment variables from config file + ENV_DICT="" + if [ -f "$CONFIG_FILE" ]; then + while IFS= read -r line; do + # Skip comments and empty lines + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "$line" ]] && continue + key="${line%%=*}" + value="${line#*=}" + ENV_DICT="${ENV_DICT} + ${key} + ${value}" + done < "$CONFIG_FILE" + fi + + cat > "$PLIST_PATH" << PLIST + + + + + Label + com.kernel.hypeman + ProgramArguments + + ${INSTALL_DIR}/${BINARY_NAME} + + EnvironmentVariables + + PATH + /opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin${ENV_DICT} + + KeepAlive + + RunAtLoad + + StandardOutPath + ${DATA_DIR}/logs/hypeman.log + StandardErrorPath + ${DATA_DIR}/logs/hypeman.log + + +PLIST + + info "Loading ${SERVICE_NAME} service..." + launchctl load "$PLIST_PATH" +else + # Linux: systemd + info "Installing systemd service..." + $SUDO tee "${SYSTEMD_DIR}/${SERVICE_NAME}.service" > /dev/null << EOF [Unit] Description=Hypeman API Server Documentation=https://github.com/kernel/hypeman @@ -396,17 +587,42 @@ ReadWritePaths=${DATA_DIR} WantedBy=multi-user.target EOF -# Reload systemd -info "Reloading systemd..." -$SUDO systemctl daemon-reload + info "Reloading systemd..." + $SUDO systemctl daemon-reload + + info "Enabling ${SERVICE_NAME} service..." + $SUDO systemctl enable "$SERVICE_NAME" + + info "Starting ${SERVICE_NAME} service..." + $SUDO systemctl start "$SERVICE_NAME" +fi + +# ============================================================================= +# Build builder image (macOS) +# ============================================================================= -# Enable service -info "Enabling ${SERVICE_NAME} service..." -$SUDO systemctl enable "$SERVICE_NAME" +if [ "$OS" = "darwin" ]; then + info "Attempting to build builder image..." + if command -v docker >/dev/null 2>&1; then + if [ -n "$BRANCH" ] && [ -d "${TMP_DIR}/hypeman" ]; then + BUILD_CONTEXT="${TMP_DIR}/hypeman" + else + BUILD_CONTEXT="" + fi -# Start service -info "Starting ${SERVICE_NAME} service..." -$SUDO systemctl start "$SERVICE_NAME" + if [ -n "$BUILD_CONTEXT" ] && [ -f "${BUILD_CONTEXT}/lib/builds/images/generic/Dockerfile" ]; then + if ! docker build -t hypeman/builder:latest -f "${BUILD_CONTEXT}/lib/builds/images/generic/Dockerfile" "$BUILD_CONTEXT" 2>/dev/null; then + warn "Failed to build builder image. You can build it later manually." + else + info "Builder image built successfully" + fi + else + warn "Builder image Dockerfile not available. Build it manually: docker build -t hypeman/builder:latest -f lib/builds/images/generic/Dockerfile ." + fi + else + warn "Docker not available, skipping builder image build" + fi +fi # ============================================================================= # Install Hypeman CLI @@ -416,7 +632,7 @@ CLI_REPO="kernel/hypeman-cli" if [ -z "$CLI_VERSION" ] || [ "$CLI_VERSION" == "latest" ]; then info "Fetching latest CLI version with available artifacts..." - CLI_VERSION=$(find_release_with_artifact "$CLI_REPO" "hypeman" "$OS" "$ARCH") + CLI_VERSION=$(find_release_with_artifact "$CLI_REPO" "hypeman" "$OS" "$ARCH" || true) if [ -z "$CLI_VERSION" ]; then warn "Failed to find a CLI release with artifacts for ${OS}/${ARCH}, skipping CLI installation" fi @@ -424,24 +640,28 @@ fi if [ -n "$CLI_VERSION" ]; then info "Installing Hypeman CLI version: $CLI_VERSION" - + CLI_VERSION_NUM="${CLI_VERSION#v}" CLI_ARCHIVE_NAME="hypeman_${CLI_VERSION_NUM}_${OS}_${ARCH}.tar.gz" CLI_DOWNLOAD_URL="https://github.com/${CLI_REPO}/releases/download/${CLI_VERSION}/${CLI_ARCHIVE_NAME}" - + info "Downloading CLI ${CLI_ARCHIVE_NAME}..." if curl -fsSL "$CLI_DOWNLOAD_URL" -o "${TMP_DIR}/${CLI_ARCHIVE_NAME}"; then info "Extracting CLI..." mkdir -p "${TMP_DIR}/cli" tar -xzf "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -C "${TMP_DIR}/cli" - - # Install CLI binary - info "Installing hypeman CLI to ${INSTALL_DIR}..." - $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman-cli" - - # Install wrapper script to /usr/local/bin for PATH access - info "Installing hypeman wrapper to /usr/local/bin..." - $SUDO tee /usr/local/bin/hypeman > /dev/null << WRAPPER + + if [ "$OS" = "darwin" ]; then + info "Installing hypeman CLI to ${INSTALL_DIR}..." + install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman" + else + # Install CLI binary + info "Installing hypeman CLI to ${INSTALL_DIR}..." + $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman-cli" + + # Install wrapper script to /usr/local/bin for PATH access + info "Installing hypeman wrapper to /usr/local/bin..." + $SUDO tee /usr/local/bin/hypeman > /dev/null << WRAPPER #!/bin/bash # Wrapper script for hypeman CLI that auto-generates API token set -a @@ -450,7 +670,8 @@ set +a export HYPEMAN_API_KEY=\$(${INSTALL_DIR}/hypeman-token -user-id "cli-user-\$(whoami)" 2>/dev/null) exec ${INSTALL_DIR}/hypeman-cli "\$@" WRAPPER - $SUDO chmod 755 /usr/local/bin/hypeman + $SUDO chmod 755 /usr/local/bin/hypeman + fi else warn "Failed to download CLI from ${CLI_DOWNLOAD_URL}, skipping CLI installation" fi @@ -473,12 +694,25 @@ EOF echo -e "${NC}" info "Hypeman installed successfully!" echo "" -echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" -echo " CLI: /usr/local/bin/hypeman" -echo " Token tool: /usr/local/bin/hypeman-token" -echo " Config: ${CONFIG_FILE}" -echo " Data: ${DATA_DIR}" -echo " Service: ${SERVICE_NAME}.service" + +if [ "$OS" = "darwin" ]; then + echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" + echo " VZ Shim: ${INSTALL_DIR}/vz-shim" + echo " CLI: ${INSTALL_DIR}/hypeman" + echo " Token tool: ${INSTALL_DIR}/hypeman-token" + echo " Config: ${CONFIG_FILE}" + echo " Data: ${DATA_DIR}" + echo " Service: ~/Library/LaunchAgents/com.kernel.hypeman.plist" + echo " Logs: ${DATA_DIR}/logs/hypeman.log" +else + echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" + echo " CLI: /usr/local/bin/hypeman" + echo " Token tool: /usr/local/bin/hypeman-token" + echo " Config: ${CONFIG_FILE}" + echo " Data: ${DATA_DIR}" + echo " Service: ${SERVICE_NAME}.service" +fi + echo "" echo "" echo "Next steps:" diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh index ac45bb42..607b64ef 100755 --- a/scripts/uninstall.sh +++ b/scripts/uninstall.sh @@ -6,16 +6,12 @@ # curl -fsSL https://raw.githubusercontent.com/kernel/hypeman/main/scripts/uninstall.sh | bash # # Options (via environment variables): -# KEEP_DATA=false - Remove data directory (/var/lib/hypeman) - kept by default -# KEEP_CONFIG=true - Keep config directory (/etc/hypeman) +# KEEP_DATA=false - Remove data directory - kept by default +# KEEP_CONFIG=true - Keep config directory # set -e -INSTALL_DIR="/opt/hypeman" -DATA_DIR="/var/lib/hypeman" -CONFIG_DIR="/etc/hypeman" -SYSTEMD_DIR="/etc/systemd/system" SERVICE_NAME="hypeman" SERVICE_USER="hypeman" @@ -30,50 +26,100 @@ info() { echo -e "${GREEN}[INFO]${NC} $1"; } warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } +# ============================================================================= +# Detect OS +# ============================================================================= + +OS=$(uname -s | tr '[:upper:]' '[:lower:]') +if [ "$OS" != "linux" ] && [ "$OS" != "darwin" ]; then + error "Unsupported OS: $OS (supported: linux, darwin)" +fi + +# ============================================================================= +# OS-conditional defaults +# ============================================================================= + +if [ "$OS" = "darwin" ]; then + INSTALL_DIR="/usr/local/bin" + DATA_DIR="$HOME/Library/Application Support/hypeman" + CONFIG_DIR="$HOME/.config/hypeman" +else + INSTALL_DIR="/opt/hypeman" + DATA_DIR="/var/lib/hypeman" + CONFIG_DIR="/etc/hypeman" +fi + +SYSTEMD_DIR="/etc/systemd/system" + # ============================================================================= # Pre-flight checks # ============================================================================= info "Running pre-flight checks..." -# Check for root or sudo access SUDO="" -if [ "$EUID" -ne 0 ]; then - if ! command -v sudo >/dev/null 2>&1; then - error "This script requires root privileges. Please run as root or install sudo." +if [ "$OS" = "linux" ]; then + if [ "$EUID" -ne 0 ]; then + if ! command -v sudo >/dev/null 2>&1; then + error "This script requires root privileges. Please run as root or install sudo." + fi + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi + fi + SUDO="sudo" fi - # Try passwordless sudo first, then prompt from terminal if needed - if ! sudo -n true 2>/dev/null; then - info "Requesting sudo privileges..." - if ! sudo -v < /dev/tty; then - error "Failed to obtain sudo privileges" +elif [ "$OS" = "darwin" ]; then + if [ ! -w "$INSTALL_DIR" ] 2>/dev/null; then + if command -v sudo >/dev/null 2>&1; then + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges (needed for $INSTALL_DIR)..." + sudo -v < /dev/tty 2>/dev/null || true + fi + SUDO="sudo" fi fi - SUDO="sudo" fi # ============================================================================= # Stop and disable service # ============================================================================= -if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Stopping ${SERVICE_NAME} service..." - $SUDO systemctl stop "$SERVICE_NAME" -fi +if [ "$OS" = "darwin" ]; then + PLIST_PATH="$HOME/Library/LaunchAgents/com.kernel.hypeman.plist" + if [ -f "$PLIST_PATH" ]; then + info "Stopping ${SERVICE_NAME} service..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + fi +else + if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Stopping ${SERVICE_NAME} service..." + $SUDO systemctl stop "$SERVICE_NAME" + fi -if $SUDO systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Disabling ${SERVICE_NAME} service..." - $SUDO systemctl disable "$SERVICE_NAME" + if $SUDO systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Disabling ${SERVICE_NAME} service..." + $SUDO systemctl disable "$SERVICE_NAME" + fi fi # ============================================================================= -# Remove systemd service +# Remove service files # ============================================================================= -if [ -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" ]; then - info "Removing systemd service..." - $SUDO rm -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" - $SUDO systemctl daemon-reload +if [ "$OS" = "darwin" ]; then + if [ -f "$PLIST_PATH" ]; then + info "Removing launchd plist..." + rm -f "$PLIST_PATH" + fi +else + if [ -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" ]; then + info "Removing systemd service..." + $SUDO rm -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" + $SUDO systemctl daemon-reload + fi fi # ============================================================================= @@ -82,13 +128,31 @@ fi info "Removing binaries..." -# Remove wrapper scripts from /usr/local/bin -$SUDO rm -f /usr/local/bin/hypeman -$SUDO rm -f /usr/local/bin/hypeman-token +if [ "$OS" = "darwin" ]; then + $SUDO rm -f "${INSTALL_DIR}/hypeman-api" + $SUDO rm -f "${INSTALL_DIR}/vz-shim" + $SUDO rm -f "${INSTALL_DIR}/hypeman-token" + $SUDO rm -f "${INSTALL_DIR}/hypeman" +else + # Remove wrapper scripts from /usr/local/bin + $SUDO rm -f /usr/local/bin/hypeman + $SUDO rm -f /usr/local/bin/hypeman-token + + # Remove install directory + if [ -d "$INSTALL_DIR" ]; then + $SUDO rm -rf "$INSTALL_DIR" + fi +fi + +# ============================================================================= +# Kill orphan vz-shim processes (macOS) +# ============================================================================= -# Remove install directory -if [ -d "$INSTALL_DIR" ]; then - $SUDO rm -rf "$INSTALL_DIR" +if [ "$OS" = "darwin" ]; then + if pgrep -f vz-shim >/dev/null 2>&1; then + info "Killing orphan vz-shim processes..." + pkill -f vz-shim 2>/dev/null || true + fi fi # ============================================================================= @@ -100,7 +164,11 @@ if [ -d "$DATA_DIR" ]; then info "Keeping data directory: ${DATA_DIR}" else info "Removing data directory: ${DATA_DIR}" - $SUDO rm -rf "$DATA_DIR" + if [ "$OS" = "darwin" ]; then + rm -rf "$DATA_DIR" + else + $SUDO rm -rf "$DATA_DIR" + fi fi fi @@ -113,20 +181,26 @@ if [ -d "$CONFIG_DIR" ]; then warn "Keeping config directory: ${CONFIG_DIR}" else info "Removing config directory: ${CONFIG_DIR}" - $SUDO rm -rf "$CONFIG_DIR" + if [ "$OS" = "darwin" ]; then + rm -rf "$CONFIG_DIR" + else + $SUDO rm -rf "$CONFIG_DIR" + fi fi fi # ============================================================================= -# Remove hypeman user +# Remove hypeman user (Linux only) # ============================================================================= -if id "$SERVICE_USER" &>/dev/null; then - if [ "${KEEP_DATA:-true}" = "true" ]; then - info "Keeping system user: ${SERVICE_USER} (data is preserved)" - else - info "Removing system user: ${SERVICE_USER}" - $SUDO userdel "$SERVICE_USER" 2>/dev/null || true +if [ "$OS" = "linux" ]; then + if id "$SERVICE_USER" &>/dev/null; then + if [ "${KEEP_DATA:-true}" = "true" ]; then + info "Keeping system user: ${SERVICE_USER} (data is preserved)" + else + info "Removing system user: ${SERVICE_USER}" + $SUDO userdel "$SERVICE_USER" 2>/dev/null || true + fi fi fi @@ -150,19 +224,33 @@ echo "" if [ "${KEEP_DATA:-true}" = "true" ] && [ -d "$DATA_DIR" ]; then info "Data directory preserved: ${DATA_DIR}" - echo " To remove: sudo rm -rf ${DATA_DIR}" + if [ "$OS" = "darwin" ]; then + echo " To remove: rm -rf \"${DATA_DIR}\"" + else + echo " To remove: sudo rm -rf ${DATA_DIR}" + fi echo "" fi if [ "${KEEP_CONFIG:-false}" = "true" ] && [ -d "$CONFIG_DIR" ]; then info "Config directory preserved: ${CONFIG_DIR}" - echo " To remove: sudo rm -rf ${CONFIG_DIR}" + if [ "$OS" = "darwin" ]; then + echo " To remove: rm -rf \"${CONFIG_DIR}\"" + else + echo " To remove: sudo rm -rf ${CONFIG_DIR}" + fi echo "" fi -warn "Note: Caddy or Cloud Hypervisor processes may still be running." -echo " Check with: ps aux | grep -E 'caddy|cloud-h'" -echo " Kill all: sudo pkill -f caddy; sudo pkill -f cloud-h" +if [ "$OS" = "darwin" ]; then + warn "Note: vz-shim processes may still be running." + echo " Check with: ps aux | grep vz-shim" + echo " Kill all: pkill -f vz-shim" +else + warn "Note: Caddy or Cloud Hypervisor processes may still be running." + echo " Check with: ps aux | grep -E 'caddy|cloud-h'" + echo " Kill all: sudo pkill -f caddy; sudo pkill -f cloud-h" +fi echo "" echo "To reinstall:" diff --git a/vz.entitlements b/vz.entitlements new file mode 100644 index 00000000..41432913 --- /dev/null +++ b/vz.entitlements @@ -0,0 +1,14 @@ + + + + + + com.apple.security.virtualization + + + com.apple.security.network.server + + com.apple.security.network.client + + + From 39178220a7911f565941d3cdbd050086f128f5af Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 10 Feb 2026 16:48:04 -0500 Subject: [PATCH 03/24] fix: embed vz.entitlements and fix macOS runtime issues - Embed vz.entitlements as a Go resource and write it to a temp file at runtime for codesigning, replacing the broken entitlementsPath() that looked for the file next to the executable - Add vz-shim copy step in .air.darwin.toml so the go:embed directive can find the binary during dev builds - Add --entitlements flag to codesign in install.sh download path so binaries receive the virtualization entitlement - Prepend /opt/homebrew/opt/e2fsprogs/sbin to launchd plist PATH so mkfs.ext4 from keg-only e2fsprogs is found at runtime Co-Authored-By: Claude Opus 4.6 --- .air.darwin.toml | 2 +- lib/hypervisor/vz/starter.go | 28 ++++++++++++++++++---------- lib/hypervisor/vz/vz.entitlements | 12 ++++++++++++ lib/hypervisor/vz/vz_entitlements.go | 8 ++++++++ scripts/install.sh | 24 ++++++++++++++++++++---- 5 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 lib/hypervisor/vz/vz.entitlements create mode 100644 lib/hypervisor/vz/vz_entitlements.go diff --git a/.air.darwin.toml b/.air.darwin.toml index ded73f54..9eb00963 100644 --- a/.air.darwin.toml +++ b/.air.darwin.toml @@ -7,7 +7,7 @@ tmp_dir = "tmp" bin = "./tmp/main" # Build for macOS with vz support, then sign with entitlements # Also builds and signs vz-shim (subprocess that hosts vz VMs) - cmd = "make build-embedded && go build -o ./tmp/vz-shim ./cmd/vz-shim && codesign --sign - --entitlements vz.entitlements --force ./tmp/vz-shim && go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && codesign --sign - --entitlements vz.entitlements --force ./tmp/main" + cmd = "make build-embedded && go build -o ./tmp/vz-shim ./cmd/vz-shim && codesign --sign - --entitlements vz.entitlements --force ./tmp/vz-shim && mkdir -p lib/hypervisor/vz/vz-shim && cp ./tmp/vz-shim lib/hypervisor/vz/vz-shim/vz-shim && go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && codesign --sign - --entitlements vz.entitlements --force ./tmp/main" delay = 1000 exclude_dir = ["assets", "tmp", "vendor", "testdata", "bin", "scripts", "data", "kernel"] exclude_file = [] diff --git a/lib/hypervisor/vz/starter.go b/lib/hypervisor/vz/starter.go index 15ea9847..476a878c 100644 --- a/lib/hypervisor/vz/starter.go +++ b/lib/hypervisor/vz/starter.go @@ -57,8 +57,25 @@ func extractShim() (string, error) { return } + // Write embedded entitlements to a temp file for codesigning + entFile, err := os.CreateTemp("", "vz-entitlements-*.plist") + if err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("create entitlements temp file: %w", err) + return + } + defer os.Remove(entFile.Name()) + + if _, err := entFile.Write(vzEntitlements); err != nil { + os.Remove(f.Name()) + entFile.Close() + shimErr = fmt.Errorf("write entitlements file: %w", err) + return + } + entFile.Close() + // Codesign with entitlements for Virtualization.framework - cmd := exec.Command("codesign", "--sign", "-", "--entitlements", entitlementsPath(), "--force", f.Name()) + cmd := exec.Command("codesign", "--sign", "-", "--entitlements", entFile.Name(), "--force", f.Name()) if out, err := cmd.CombinedOutput(); err != nil { os.Remove(f.Name()) shimErr = fmt.Errorf("codesign vz-shim: %s: %w", string(out), err) @@ -70,15 +87,6 @@ func extractShim() (string, error) { return shimPath, shimErr } -// entitlementsPath returns the path to the vz.entitlements file. -func entitlementsPath() string { - exe, err := os.Executable() - if err != nil { - return "vz.entitlements" - } - return filepath.Join(filepath.Dir(exe), "vz.entitlements") -} - // Starter implements hypervisor.VMStarter for Virtualization.framework. type Starter struct{} diff --git a/lib/hypervisor/vz/vz.entitlements b/lib/hypervisor/vz/vz.entitlements new file mode 100644 index 00000000..c5004a80 --- /dev/null +++ b/lib/hypervisor/vz/vz.entitlements @@ -0,0 +1,12 @@ + + + + + com.apple.security.virtualization + + com.apple.security.network.server + + com.apple.security.network.client + + + diff --git a/lib/hypervisor/vz/vz_entitlements.go b/lib/hypervisor/vz/vz_entitlements.go new file mode 100644 index 00000000..82b8bf47 --- /dev/null +++ b/lib/hypervisor/vz/vz_entitlements.go @@ -0,0 +1,8 @@ +//go:build darwin + +package vz + +import _ "embed" + +//go:embed vz.entitlements +var vzEntitlements []byte diff --git a/scripts/install.sh b/scripts/install.sh index 0959d66b..6538fefe 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -348,11 +348,27 @@ else info "Extracting..." tar -xzf "${TMP_DIR}/${ARCHIVE_NAME}" -C "$TMP_DIR" - # On macOS, codesign after extraction + # On macOS, codesign after extraction with virtualization entitlements if [ "$OS" = "darwin" ]; then info "Signing binaries..." - codesign --force --sign - "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null || true - [ -f "${TMP_DIR}/vz-shim" ] && codesign --force --sign - "${TMP_DIR}/vz-shim" 2>/dev/null || true + ENTITLEMENTS_TMP="${TMP_DIR}/vz.entitlements" + cat > "$ENTITLEMENTS_TMP" << 'ENTITLEMENTS' + + + + + com.apple.security.virtualization + + com.apple.security.network.server + + com.apple.security.network.client + + + +ENTITLEMENTS + codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null || true + [ -f "${TMP_DIR}/vz-shim" ] && codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/vz-shim" 2>/dev/null || true + rm -f "$ENTITLEMENTS_TMP" fi fi @@ -543,7 +559,7 @@ if [ "$OS" = "darwin" ]; then EnvironmentVariables PATH - /opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin${ENV_DICT} + /opt/homebrew/opt/e2fsprogs/sbin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin${ENV_DICT} KeepAlive From 59db909959cbc60f80c18fc7f81ff7b563850b62 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 10 Feb 2026 20:15:41 -0500 Subject: [PATCH 04/24] fix: remove stale disk platform files from rebase disk_darwin.go and disk_linux.go were unified into disk.go in PR #89 but snuck back in during the rebase as new files with no conflicts. Co-Authored-By: Claude Opus 4.6 --- lib/images/oci_public.go | 10 ++++++++ lib/resources/disk_darwin.go | 49 ------------------------------------ lib/resources/disk_linux.go | 42 ------------------------------- 3 files changed, 10 insertions(+), 91 deletions(-) delete mode 100644 lib/resources/disk_darwin.go delete mode 100644 lib/resources/disk_linux.go diff --git a/lib/images/oci_public.go b/lib/images/oci_public.go index a7a7c53f..eb87b0db 100644 --- a/lib/images/oci_public.go +++ b/lib/images/oci_public.go @@ -25,6 +25,11 @@ func (c *OCIClient) InspectManifest(ctx context.Context, imageRef string) (strin return c.client.inspectManifest(ctx, imageRef) } +// InspectManifestForLinux is an alias for InspectManifest (all images target Linux) +func (c *OCIClient) InspectManifestForLinux(ctx context.Context, imageRef string) (string, error) { + return c.InspectManifest(ctx, imageRef) +} + // PullAndUnpack pulls an OCI image and unpacks it to a directory (public for system manager). // Always targets Linux platform since hypeman VMs are Linux guests. func (c *OCIClient) PullAndUnpack(ctx context.Context, imageRef, digest, exportDir string) error { @@ -34,3 +39,8 @@ func (c *OCIClient) PullAndUnpack(ctx context.Context, imageRef, digest, exportD } return nil } + +// PullAndUnpackForLinux is an alias for PullAndUnpack (all images target Linux) +func (c *OCIClient) PullAndUnpackForLinux(ctx context.Context, imageRef, digest, exportDir string) error { + return c.PullAndUnpack(ctx, imageRef, digest, exportDir) +} diff --git a/lib/resources/disk_darwin.go b/lib/resources/disk_darwin.go deleted file mode 100644 index 8a5d32f3..00000000 --- a/lib/resources/disk_darwin.go +++ /dev/null @@ -1,49 +0,0 @@ -//go:build darwin - -package resources - -import ( - "os" - - "github.com/c2h5oh/datasize" - "github.com/kernel/hypeman/cmd/api/config" - "github.com/kernel/hypeman/lib/paths" - "golang.org/x/sys/unix" -) - -// NewDiskResource discovers disk capacity on macOS. -func NewDiskResource(cfg *config.Config, p *paths.Paths, instLister InstanceLister, imgLister ImageLister, volLister VolumeLister) (*DiskResource, error) { - var capacity int64 - - if cfg.DiskLimit != "" { - // Parse configured limit - var ds datasize.ByteSize - if err := ds.UnmarshalText([]byte(cfg.DiskLimit)); err != nil { - return nil, err - } - capacity = int64(ds.Bytes()) - } else { - // Auto-detect from filesystem using statfs - var stat unix.Statfs_t - dataDir := cfg.DataDir - if err := unix.Statfs(dataDir, &stat); err != nil { - // Fallback: try to stat the root if data dir doesn't exist yet - if os.IsNotExist(err) { - if err := unix.Statfs("/", &stat); err != nil { - return nil, err - } - } else { - return nil, err - } - } - capacity = int64(stat.Blocks) * int64(stat.Bsize) - } - - return &DiskResource{ - capacity: capacity, - dataDir: cfg.DataDir, - instanceLister: instLister, - imageLister: imgLister, - volumeLister: volLister, - }, nil -} diff --git a/lib/resources/disk_linux.go b/lib/resources/disk_linux.go deleted file mode 100644 index e6cc8fb1..00000000 --- a/lib/resources/disk_linux.go +++ /dev/null @@ -1,42 +0,0 @@ -//go:build linux - -package resources - -import ( - "syscall" - - "github.com/c2h5oh/datasize" - "github.com/kernel/hypeman/cmd/api/config" - "github.com/kernel/hypeman/lib/paths" -) - -// NewDiskResource discovers disk capacity for the data directory. -// If cfg.DiskLimit is set, uses that as capacity; otherwise auto-detects via statfs. -func NewDiskResource(cfg *config.Config, p *paths.Paths, instLister InstanceLister, imgLister ImageLister, volLister VolumeLister) (*DiskResource, error) { - var capacity int64 - - if cfg.DiskLimit != "" { - // Parse configured limit - var ds datasize.ByteSize - if err := ds.UnmarshalText([]byte(cfg.DiskLimit)); err != nil { - return nil, err - } - capacity = int64(ds.Bytes()) - } else { - // Auto-detect from filesystem - var stat syscall.Statfs_t - if err := syscall.Statfs(cfg.DataDir, &stat); err != nil { - return nil, err - } - // Total space = blocks * block size - capacity = int64(stat.Blocks) * int64(stat.Bsize) - } - - return &DiskResource{ - capacity: capacity, - dataDir: cfg.DataDir, - instanceLister: instLister, - imageLister: imgLister, - volumeLister: volLister, - }, nil -} From 6dcf6272f74378fbcde023f66dd67175bec5ee99 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 11 Feb 2026 15:21:06 -0500 Subject: [PATCH 05/24] fix: vsock proxy data loss, zombie reaping, and remove vz-shim from install - Read from bufio.Reader instead of raw conn in vsock proxy to prevent silent data loss when the buffered reader consumed beyond the newline - Replace cmd.Process.Release() with go cmd.Wait() to properly reap vz-shim child processes instead of leaving zombies - Update hypervisor README to reflect vz subprocess model (not in-process) - Remove vz-shim from install/uninstall scripts (it's embedded in hypeman-api and extracted at runtime) - Add CLI smoke tests (hypeman ps, hypeman images) to e2e install test Co-Authored-By: Claude Opus 4.6 --- cmd/vz-shim/server.go | 2 +- lib/hypervisor/README.md | 8 +++---- lib/hypervisor/vz/starter.go | 2 +- scripts/e2e-install-test.sh | 45 +++++++++++++++++++++++++++++++++++- scripts/install.sh | 36 +++++++---------------------- scripts/uninstall.sh | 15 +----------- 6 files changed, 59 insertions(+), 49 deletions(-) diff --git a/cmd/vz-shim/server.go b/cmd/vz-shim/server.go index 029827e7..38c29a40 100644 --- a/cmd/vz-shim/server.go +++ b/cmd/vz-shim/server.go @@ -246,7 +246,7 @@ func (s *ShimServer) handleVsockConnection(conn net.Conn) { done := make(chan struct{}, 2) go func() { - io.Copy(guestConn, conn) + io.Copy(guestConn, reader) done <- struct{}{} }() diff --git a/lib/hypervisor/README.md b/lib/hypervisor/README.md index 3eafd673..11ceccd1 100644 --- a/lib/hypervisor/README.md +++ b/lib/hypervisor/README.md @@ -17,7 +17,7 @@ Hypeman originally supported only Cloud Hypervisor. This abstraction layer allow |------------|----------|---------------|-------------------| | Cloud Hypervisor | Linux | External process | HTTP API over Unix socket | | QEMU | Linux | External process | QMP over Unix socket | -| vz | macOS | In-process | Direct API calls | +| vz | macOS | Subprocess (vz-shim) | gRPC over Unix socket | ## How It Works @@ -26,7 +26,7 @@ The abstraction defines two key interfaces: 1. **Hypervisor** - VM lifecycle operations (create, boot, pause, resume, snapshot, restore, shutdown) 2. **VMStarter** - VM startup and configuration (start binary, get binary path) -Each implementation translates generic configuration to its native format. Cloud Hypervisor and QEMU run as external processes with socket-based control. The vz implementation runs VMs in-process using Apple's Virtualization.framework. +Each implementation translates generic configuration to its native format. Cloud Hypervisor and QEMU run as external processes with socket-based control. The vz implementation runs VMs as separate vz-shim subprocesses using Apple's Virtualization.framework. Before using optional features, callers check capabilities: @@ -44,8 +44,8 @@ if hv.Capabilities().SupportsSnapshot { - TAP devices and Linux bridges for networking ### macOS (vz) -- VMs run in-process (no separate PID) -- VMs stop if hypeman stops (cannot reconnect) +- VMs run as separate vz-shim subprocesses (detached process group) +- State persists across hypeman restarts (reconnect via socket) - NAT networking via Virtualization.framework - Requires code signing with virtualization entitlement diff --git a/lib/hypervisor/vz/starter.go b/lib/hypervisor/vz/starter.go index 476a878c..90a7b5e6 100644 --- a/lib/hypervisor/vz/starter.go +++ b/lib/hypervisor/vz/starter.go @@ -178,7 +178,7 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s return 0, nil, fmt.Errorf("connect to vz-shim: %w", err) } - cmd.Process.Release() + go cmd.Wait() return pid, client, nil } diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index 41a506dd..d8022a2b 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -81,7 +81,6 @@ info "Phase 4: Validating installation..." # Check binaries if [ "$OS" = "darwin" ]; then [ -x /usr/local/bin/hypeman-api ] || fail "hypeman-api binary not found" - [ -x /usr/local/bin/vz-shim ] || fail "vz-shim binary not found" pass "Binaries installed correctly" # Check launchd service @@ -110,6 +109,50 @@ else fi pass "Config file exists" +# ============================================================================= +# Phase 4b: Testing CLI commands +# ============================================================================= +info "Phase 4b: Testing CLI commands..." + +# Determine config file path +if [ "$OS" = "darwin" ]; then + CONFIG_FILE="$HOME/.config/hypeman/config" +else + CONFIG_FILE="/etc/hypeman/config" +fi + +# Source config to get JWT_SECRET and PORT +set -a +source "$CONFIG_FILE" +set +a + +# Generate API token using hypeman-token +if [ "$OS" = "darwin" ]; then + API_KEY=$("/usr/local/bin/hypeman-token" -user-id "e2e-test-user") +else + API_KEY=$("/opt/hypeman/bin/hypeman-token" -user-id "e2e-test-user") +fi +[ -n "$API_KEY" ] || fail "Failed to generate API token" +pass "Generated API token" + +# Set CLI env +export HYPEMAN_API_KEY="$API_KEY" +export HYPEMAN_BASE_URL="http://localhost:${PORT:-8080}" + +# Determine CLI path +if [ "$OS" = "darwin" ]; then + HYPEMAN_CMD="/usr/local/bin/hypeman" +else + HYPEMAN_CMD="/usr/local/bin/hypeman" +fi + +# Test CLI commands +$HYPEMAN_CMD ps || fail "hypeman ps failed" +pass "hypeman ps works" + +$HYPEMAN_CMD images || fail "hypeman images failed" +pass "hypeman images works" + # ============================================================================= # Phase 5: Cleanup # ============================================================================= diff --git a/scripts/install.sh b/scripts/install.sh index 6538fefe..e44e664c 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -247,12 +247,10 @@ if [ -n "$BINARY_DIR" ]; then info "Copying binaries from ${BINARY_DIR}..." if [ "$OS" = "darwin" ]; then - for f in "${BINARY_NAME}" "vz-shim" "hypeman-token" ".env.darwin.example"; do + for f in "${BINARY_NAME}" "hypeman-token" ".env.darwin.example"; do [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" done - cp "${BINARY_DIR}/vz-shim" "${TMP_DIR}/vz-shim" cp "${BINARY_DIR}/.env.darwin.example" "${TMP_DIR}/.env.darwin.example" - chmod +x "${TMP_DIR}/vz-shim" else for f in "${BINARY_NAME}" "hypeman-token" ".env.example"; do [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" @@ -283,34 +281,24 @@ elif [ -n "$BRANCH" ]; then info "Building binaries (this may take a few minutes)..." cd "$BUILD_DIR" + if ! make build >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Build failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Build failed" + fi if [ "$OS" = "darwin" ]; then - # macOS: build darwin targets and sign - if ! make build-darwin >> "$BUILD_LOG" 2>&1; then - echo "" - echo -e "${RED}Build failed. Full build log:${NC}" - cat "$BUILD_LOG" - error "Build failed" - fi if ! make sign-darwin >> "$BUILD_LOG" 2>&1; then echo "" echo -e "${RED}Signing failed. Full build log:${NC}" cat "$BUILD_LOG" error "Signing failed" fi - cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" - cp "bin/vz-shim" "${TMP_DIR}/vz-shim" cp ".env.darwin.example" "${TMP_DIR}/.env.darwin.example" else - # Linux: standard build - if ! make build >> "$BUILD_LOG" 2>&1; then - echo "" - echo -e "${RED}Build failed. Full build log:${NC}" - cat "$BUILD_LOG" - error "Build failed" - fi - cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" cp ".env.example" "${TMP_DIR}/.env.example" fi + cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" # Build hypeman-token (not included in make build) if ! go build -o "${TMP_DIR}/hypeman-token" ./cmd/gen-jwt >> "$BUILD_LOG" 2>&1; then @@ -367,7 +355,6 @@ else ENTITLEMENTS codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null || true - [ -f "${TMP_DIR}/vz-shim" ] && codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/vz-shim" 2>/dev/null || true rm -f "$ENTITLEMENTS_TMP" fi fi @@ -401,12 +388,6 @@ $SUDO install -m 755 "${TMP_DIR}/${BINARY_NAME}" "${INSTALL_DIR}/${BINARY_NAME}" info "Installing hypeman-token to ${INSTALL_DIR}..." $SUDO install -m 755 "${TMP_DIR}/hypeman-token" "${INSTALL_DIR}/hypeman-token" -# Install vz-shim on macOS -if [ "$OS" = "darwin" ] && [ -f "${TMP_DIR}/vz-shim" ]; then - info "Installing vz-shim to ${INSTALL_DIR}..." - $SUDO install -m 755 "${TMP_DIR}/vz-shim" "${INSTALL_DIR}/vz-shim" -fi - if [ "$OS" = "linux" ]; then # Install wrapper script to /usr/local/bin for easy access info "Installing hypeman-token wrapper to /usr/local/bin..." @@ -713,7 +694,6 @@ echo "" if [ "$OS" = "darwin" ]; then echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" - echo " VZ Shim: ${INSTALL_DIR}/vz-shim" echo " CLI: ${INSTALL_DIR}/hypeman" echo " Token tool: ${INSTALL_DIR}/hypeman-token" echo " Config: ${CONFIG_FILE}" diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh index 607b64ef..ccf9d673 100755 --- a/scripts/uninstall.sh +++ b/scripts/uninstall.sh @@ -130,7 +130,6 @@ info "Removing binaries..." if [ "$OS" = "darwin" ]; then $SUDO rm -f "${INSTALL_DIR}/hypeman-api" - $SUDO rm -f "${INSTALL_DIR}/vz-shim" $SUDO rm -f "${INSTALL_DIR}/hypeman-token" $SUDO rm -f "${INSTALL_DIR}/hypeman" else @@ -144,17 +143,6 @@ else fi fi -# ============================================================================= -# Kill orphan vz-shim processes (macOS) -# ============================================================================= - -if [ "$OS" = "darwin" ]; then - if pgrep -f vz-shim >/dev/null 2>&1; then - info "Killing orphan vz-shim processes..." - pkill -f vz-shim 2>/dev/null || true - fi -fi - # ============================================================================= # Handle data directory # ============================================================================= @@ -243,9 +231,8 @@ if [ "${KEEP_CONFIG:-false}" = "true" ] && [ -d "$CONFIG_DIR" ]; then fi if [ "$OS" = "darwin" ]; then - warn "Note: vz-shim processes may still be running." + warn "Note: vz-shim processes managed by hypeman may still be running." echo " Check with: ps aux | grep vz-shim" - echo " Kill all: pkill -f vz-shim" else warn "Note: Caddy or Cloud Hypervisor processes may still be running." echo " Check with: ps aux | grep -E 'caddy|cloud-h'" From 570d99cd19665712859fc65328f8b65cced47366 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 11 Feb 2026 15:24:36 -0500 Subject: [PATCH 06/24] fix: e2e test config sourcing and missing CLI handling - Extract JWT_SECRET/PORT with grep instead of sourcing the config file, which breaks on macOS where paths contain spaces - Skip CLI smoke tests gracefully when CLI binary is not installed (e.g., no darwin/arm64 release available) Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index d8022a2b..089e26ff 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -121,10 +121,10 @@ else CONFIG_FILE="/etc/hypeman/config" fi -# Source config to get JWT_SECRET and PORT -set -a -source "$CONFIG_FILE" -set +a +# Extract JWT_SECRET and PORT from config (source is unsafe — values may contain spaces) +JWT_SECRET=$(grep '^JWT_SECRET=' "$CONFIG_FILE" | cut -d= -f2-) +PORT=$(grep '^PORT=' "$CONFIG_FILE" | cut -d= -f2-) +export JWT_SECRET # Generate API token using hypeman-token if [ "$OS" = "darwin" ]; then @@ -146,12 +146,16 @@ else HYPEMAN_CMD="/usr/local/bin/hypeman" fi -# Test CLI commands -$HYPEMAN_CMD ps || fail "hypeman ps failed" -pass "hypeman ps works" +# Test CLI commands (skip if CLI was not installed) +if [ -x "$HYPEMAN_CMD" ]; then + $HYPEMAN_CMD ps || fail "hypeman ps failed" + pass "hypeman ps works" -$HYPEMAN_CMD images || fail "hypeman images failed" -pass "hypeman images works" + $HYPEMAN_CMD images || fail "hypeman images failed" + pass "hypeman images works" +else + warn "CLI not installed, skipping CLI smoke tests" +fi # ============================================================================= # Phase 5: Cleanup From 53ac4ca3dd34407ac4d98088105bc17cc1686c7e Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 11 Feb 2026 16:21:14 -0500 Subject: [PATCH 07/24] fix: remove unused registry-push flag from gen-jwt Builder images are now auto-built on startup, so manual push workflow and the -registry-push flag are no longer needed. The underlying repo_access JWT infrastructure remains for other registry auth flows. Co-Authored-By: Claude Opus 4.6 --- DEVELOPMENT.md | 36 +----------------------------------- cmd/gen-jwt/main.go | 9 --------- 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 857cb954..9063a907 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -257,47 +257,13 @@ The server will start on port 8080 (configurable via `PORT` environment variable ### Setting Up the Builder Image (for Dockerfile builds) -The builder image is required for `hypeman build` to work. There are two modes: - -**Automatic mode (default):** When `BUILDER_IMAGE` is unset or empty, the server will automatically build and push the builder image on startup using Docker. This is the easiest way to get started — just ensure Docker is available and run `make dev`. If a build is requested while the builder image is still being prepared, the server returns a clear error asking you to retry shortly. +The builder image is required for `hypeman build` to work. When `BUILDER_IMAGE` is unset or empty, the server will automatically build and push the builder image on startup using Docker. This is the easiest way to get started — just ensure Docker is available and run `make dev`. If a build is requested while the builder image is still being prepared, the server returns a clear error asking you to retry shortly. On macOS with Colima, set the Docker socket path: ```bash DOCKER_SOCKET=$HOME/.colima/default/docker.sock ``` -**Manual mode:** When `BUILDER_IMAGE` is explicitly set, the server assumes you manage your own image. Follow these steps: - -1. **Build the builder image** (requires Docker): - ```bash - docker build -t hypeman/builder:latest -f lib/builds/images/generic/Dockerfile . - ``` - -2. **Start the Hypeman server** (if not already running): - ```bash - make dev - ``` - -3. **Push to Hypeman's internal registry**: - ```bash - # Generate a token with registry push permissions - export JWT_SECRET="dev-secret-for-local-testing" - export HYPEMAN_API_KEY=$(go run ./cmd/gen-jwt -registry-push "hypeman/builder") - export HYPEMAN_BASE_URL="http://localhost:8080" - - # Push using hypeman-cli - hypeman push hypeman/builder:latest - ``` - -4. **Configure the builder image** in `.env`: - ```bash - BUILDER_IMAGE=localhost:8080/hypeman/builder:latest - ``` - -5. **Restart the server** to pick up the new config. - -Now `hypeman build ` will work for Dockerfile-based builds. - ### Local OpenTelemetry (optional) To collect traces and metrics locally, run the Grafana LGTM stack (Loki, Grafana, Tempo, Mimir): diff --git a/cmd/gen-jwt/main.go b/cmd/gen-jwt/main.go index 7fa2c03f..a14cd409 100644 --- a/cmd/gen-jwt/main.go +++ b/cmd/gen-jwt/main.go @@ -16,7 +16,6 @@ func main() { os.Exit(1) } userID := flag.String("user-id", "test-user", "User ID to include in the JWT token") - registryPush := flag.String("registry-push", "", "Repository to grant push access to (e.g., hypeman/builder)") flag.Parse() claims := jwt.MapClaims{ @@ -24,14 +23,6 @@ func main() { "iat": time.Now().Unix(), "exp": time.Now().Add(24 * time.Hour).Unix(), } - - // Add registry push permissions if requested - if *registryPush != "" { - claims["repo_access"] = []map[string]string{ - {"repo": *registryPush, "scope": "push"}, - } - } - token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) tokenString, err := token.SignedString([]byte(jwtSecret)) if err != nil { From a185e2cbae06ae83e4f97c2d7ed87a5b04563df6 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 11 Feb 2026 16:33:53 -0500 Subject: [PATCH 08/24] fix: remove unnecessary .gitkeep for vz-shim embed dir The vz-shim embed is darwin-only (build tag), so the directory isn't needed on Linux. On macOS the Makefile creates it before compiling. Co-Authored-By: Claude Opus 4.6 --- lib/hypervisor/vz/vz-shim/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 lib/hypervisor/vz/vz-shim/.gitkeep diff --git a/lib/hypervisor/vz/vz-shim/.gitkeep b/lib/hypervisor/vz/vz-shim/.gitkeep deleted file mode 100644 index e69de29b..00000000 From a3fc0154b48dba5aa6b0d3c1af56ee166e0692c3 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 11 Feb 2026 16:38:26 -0500 Subject: [PATCH 09/24] feat: add VM lifecycle smoke test to e2e install test Tests pull, run, exec, stop, and rm using the CLI against a real alpine VM to verify the full stack works end-to-end after install. Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index 089e26ff..327b88e4 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -153,6 +153,36 @@ if [ -x "$HYPEMAN_CMD" ]; then $HYPEMAN_CMD images || fail "hypeman images failed" pass "hypeman images works" + + # VM lifecycle test + E2E_VM_NAME="e2e-test-vm" + + $HYPEMAN_CMD pull alpine:latest || fail "hypeman pull failed" + pass "hypeman pull works" + + $HYPEMAN_CMD run --name "$E2E_VM_NAME" alpine:latest || fail "hypeman run failed" + pass "hypeman run works" + + # Wait for VM to be ready + VM_READY=false + for i in $(seq 1 30); do + if $HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello" >/dev/null 2>&1; then + VM_READY=true + break + fi + sleep 2 + done + [ "$VM_READY" = true ] || fail "VM did not become ready within 60s" + + OUTPUT=$($HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello from e2e") || fail "hypeman exec failed" + echo "$OUTPUT" | grep -q "hello from e2e" || fail "hypeman exec output mismatch: $OUTPUT" + pass "hypeman exec works" + + $HYPEMAN_CMD stop "$E2E_VM_NAME" || fail "hypeman stop failed" + pass "hypeman stop works" + + $HYPEMAN_CMD rm "$E2E_VM_NAME" || fail "hypeman rm failed" + pass "hypeman rm works" else warn "CLI not installed, skipping CLI smoke tests" fi From c326356aeb1f77d6e96a1416ea846cce10702e7f Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 11 Feb 2026 16:43:15 -0500 Subject: [PATCH 10/24] fix: create vz-shim embed directory in Makefile before copying The .gitkeep was removed so the directory no longer exists in the repo. The Makefile needs to mkdir -p before copying the built binary. Co-Authored-By: Claude Opus 4.6 --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 06336dc3..b73bd05d 100644 --- a/Makefile +++ b/Makefile @@ -297,6 +297,7 @@ ENTITLEMENTS_FILE ?= vz.entitlements build-vz-shim: | $(BIN_DIR) @echo "Building vz-shim for macOS..." go build -o $(BIN_DIR)/vz-shim ./cmd/vz-shim + mkdir -p lib/hypervisor/vz/vz-shim cp $(BIN_DIR)/vz-shim lib/hypervisor/vz/vz-shim/vz-shim @echo "Build complete: $(BIN_DIR)/vz-shim" From 732b06c9f33b1b86118e9a47fd299eb510685782 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 12 Feb 2026 20:44:04 +0000 Subject: [PATCH 11/24] Fix macOS CLI install missing $SUDO prefix The macOS CLI install on line 653 used bare 'install' while all other binary installs to $INSTALL_DIR used '$SUDO install'. When /usr/local/bin isn't writable and $SUDO is set to 'sudo', this caused a permission error that aborted the script (due to set -e) after the service was already running, leaving a partial installation. Applied via @cursor push command --- scripts/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install.sh b/scripts/install.sh index e44e664c..1758759f 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -650,7 +650,7 @@ if [ -n "$CLI_VERSION" ]; then if [ "$OS" = "darwin" ]; then info "Installing hypeman CLI to ${INSTALL_DIR}..." - install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman" + $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman" else # Install CLI binary info "Installing hypeman CLI to ${INSTALL_DIR}..." From 78c9665b06a1a58cc8098ef8479683b654e55f23 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 12 Feb 2026 15:45:20 -0500 Subject: [PATCH 12/24] fix: CLI install on macOS and make CLI a hard requirement in e2e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLI releases use goreleaser naming ("macos" not "darwin", .zip not .tar.gz). Fix artifact lookup and extraction to handle both formats. Make CLI presence a hard fail in e2e test — if the install script can't install the CLI, that's a real failure. Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 63 ++++++++++++++++++------------------- scripts/install.sh | 26 +++++++++++---- 2 files changed, 51 insertions(+), 38 deletions(-) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index 327b88e4..a8e1eadc 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -146,46 +146,45 @@ else HYPEMAN_CMD="/usr/local/bin/hypeman" fi -# Test CLI commands (skip if CLI was not installed) -if [ -x "$HYPEMAN_CMD" ]; then - $HYPEMAN_CMD ps || fail "hypeman ps failed" - pass "hypeman ps works" +# Verify CLI was installed +[ -x "$HYPEMAN_CMD" ] || fail "hypeman CLI not found at $HYPEMAN_CMD" +pass "CLI installed" - $HYPEMAN_CMD images || fail "hypeman images failed" - pass "hypeman images works" +$HYPEMAN_CMD ps || fail "hypeman ps failed" +pass "hypeman ps works" - # VM lifecycle test - E2E_VM_NAME="e2e-test-vm" +$HYPEMAN_CMD images || fail "hypeman images failed" +pass "hypeman images works" - $HYPEMAN_CMD pull alpine:latest || fail "hypeman pull failed" - pass "hypeman pull works" +# VM lifecycle test +E2E_VM_NAME="e2e-test-vm" - $HYPEMAN_CMD run --name "$E2E_VM_NAME" alpine:latest || fail "hypeman run failed" - pass "hypeman run works" +$HYPEMAN_CMD pull alpine:latest || fail "hypeman pull failed" +pass "hypeman pull works" - # Wait for VM to be ready - VM_READY=false - for i in $(seq 1 30); do - if $HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello" >/dev/null 2>&1; then - VM_READY=true - break - fi - sleep 2 - done - [ "$VM_READY" = true ] || fail "VM did not become ready within 60s" +$HYPEMAN_CMD run --name "$E2E_VM_NAME" alpine:latest || fail "hypeman run failed" +pass "hypeman run works" + +# Wait for VM to be ready +VM_READY=false +for i in $(seq 1 30); do + if $HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello" >/dev/null 2>&1; then + VM_READY=true + break + fi + sleep 2 +done +[ "$VM_READY" = true ] || fail "VM did not become ready within 60s" - OUTPUT=$($HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello from e2e") || fail "hypeman exec failed" - echo "$OUTPUT" | grep -q "hello from e2e" || fail "hypeman exec output mismatch: $OUTPUT" - pass "hypeman exec works" +OUTPUT=$($HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello from e2e") || fail "hypeman exec failed" +echo "$OUTPUT" | grep -q "hello from e2e" || fail "hypeman exec output mismatch: $OUTPUT" +pass "hypeman exec works" - $HYPEMAN_CMD stop "$E2E_VM_NAME" || fail "hypeman stop failed" - pass "hypeman stop works" +$HYPEMAN_CMD stop "$E2E_VM_NAME" || fail "hypeman stop failed" +pass "hypeman stop works" - $HYPEMAN_CMD rm "$E2E_VM_NAME" || fail "hypeman rm failed" - pass "hypeman rm works" -else - warn "CLI not installed, skipping CLI smoke tests" -fi +$HYPEMAN_CMD rm "$E2E_VM_NAME" || fail "hypeman rm failed" +pass "hypeman rm works" # ============================================================================= # Phase 5: Cleanup diff --git a/scripts/install.sh b/scripts/install.sh index 1758759f..c1382e10 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -33,13 +33,14 @@ warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } # Find the most recent release that has a specific artifact available -# Usage: find_release_with_artifact +# Usage: find_release_with_artifact [ext] # Returns: version tag (e.g., v0.5.0) or empty string if not found find_release_with_artifact() { local repo="$1" local archive_prefix="$2" local os="$3" local arch="$4" + local ext="${5:-tar.gz}" # Fetch recent release tags (up to 10) local tags @@ -51,7 +52,7 @@ find_release_with_artifact() { # Check each release for the artifact for tag in $tags; do local version_num="${tag#v}" - local artifact_name="${archive_prefix}_${version_num}_${os}_${arch}.tar.gz" + local artifact_name="${archive_prefix}_${version_num}_${os}_${arch}.${ext}" local artifact_url="https://github.com/${repo}/releases/download/${tag}/${artifact_name}" # Check if artifact exists (follow redirects, fail silently) @@ -627,11 +628,20 @@ fi CLI_REPO="kernel/hypeman-cli" +# CLI releases use goreleaser naming: "macos" not "darwin", .zip not .tar.gz on macOS +if [ "$OS" = "darwin" ]; then + CLI_OS="macos" + CLI_EXT="zip" +else + CLI_OS="$OS" + CLI_EXT="tar.gz" +fi + if [ -z "$CLI_VERSION" ] || [ "$CLI_VERSION" == "latest" ]; then info "Fetching latest CLI version with available artifacts..." - CLI_VERSION=$(find_release_with_artifact "$CLI_REPO" "hypeman" "$OS" "$ARCH" || true) + CLI_VERSION=$(find_release_with_artifact "$CLI_REPO" "hypeman" "$CLI_OS" "$ARCH" "$CLI_EXT" || true) if [ -z "$CLI_VERSION" ]; then - warn "Failed to find a CLI release with artifacts for ${OS}/${ARCH}, skipping CLI installation" + warn "Failed to find a CLI release with artifacts for ${CLI_OS}/${ARCH}, skipping CLI installation" fi fi @@ -639,14 +649,18 @@ if [ -n "$CLI_VERSION" ]; then info "Installing Hypeman CLI version: $CLI_VERSION" CLI_VERSION_NUM="${CLI_VERSION#v}" - CLI_ARCHIVE_NAME="hypeman_${CLI_VERSION_NUM}_${OS}_${ARCH}.tar.gz" + CLI_ARCHIVE_NAME="hypeman_${CLI_VERSION_NUM}_${CLI_OS}_${ARCH}.${CLI_EXT}" CLI_DOWNLOAD_URL="https://github.com/${CLI_REPO}/releases/download/${CLI_VERSION}/${CLI_ARCHIVE_NAME}" info "Downloading CLI ${CLI_ARCHIVE_NAME}..." if curl -fsSL "$CLI_DOWNLOAD_URL" -o "${TMP_DIR}/${CLI_ARCHIVE_NAME}"; then info "Extracting CLI..." mkdir -p "${TMP_DIR}/cli" - tar -xzf "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -C "${TMP_DIR}/cli" + if [ "$CLI_EXT" = "zip" ]; then + unzip -qo "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -d "${TMP_DIR}/cli" + else + tar -xzf "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -C "${TMP_DIR}/cli" + fi if [ "$OS" = "darwin" ]; then info "Installing hypeman CLI to ${INSTALL_DIR}..." From 2c9eb6707d210c32478c17030278adc9b8581505 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 12 Feb 2026 15:48:16 -0500 Subject: [PATCH 13/24] fix: remove nonexistent 'hypeman images' from e2e test The CLI doesn't have an 'images' subcommand. The VM lifecycle tests (pull, run, exec, stop, rm) cover real functionality. Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index a8e1eadc..dc48d78d 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -153,9 +153,6 @@ pass "CLI installed" $HYPEMAN_CMD ps || fail "hypeman ps failed" pass "hypeman ps works" -$HYPEMAN_CMD images || fail "hypeman images failed" -pass "hypeman images works" - # VM lifecycle test E2E_VM_NAME="e2e-test-vm" From 8c8c3f2f1be946b5e8a3f291061bfbb7dc788654 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 12 Feb 2026 15:51:20 -0500 Subject: [PATCH 14/24] fix: retry hypeman run after async pull in e2e test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Image pulls are async — 'hypeman pull' returns immediately with status:pending. Retry 'hypeman run' in a loop until the image is available. Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index dc48d78d..6dcd9002 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -159,7 +159,16 @@ E2E_VM_NAME="e2e-test-vm" $HYPEMAN_CMD pull alpine:latest || fail "hypeman pull failed" pass "hypeman pull works" -$HYPEMAN_CMD run --name "$E2E_VM_NAME" alpine:latest || fail "hypeman run failed" +# Wait for image to be available (pull is async) +IMAGE_READY=false +for i in $(seq 1 30); do + if $HYPEMAN_CMD run --name "$E2E_VM_NAME" alpine:latest 2>&1; then + IMAGE_READY=true + break + fi + sleep 2 +done +[ "$IMAGE_READY" = true ] || fail "hypeman run failed (image not ready after 60s)" pass "hypeman run works" # Wait for VM to be ready From 42f3dd5af8a83fd90280d5df61beabb61bcb5b62 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 12 Feb 2026 15:52:02 -0500 Subject: [PATCH 15/24] fix: clean up macOS development docs and Makefile comment - Remove "Alternative Commands" section (make dev covers it) - Remove known limitations that are implementation details or wrong: disk format is handled automatically, snapshots aren't supported, network ingress is internal, vz-shim is a subprocess not in-process - Keep disk format and snapshots as brief notes - Makefile: 'run' target comment says "for agents" not "for testing" Co-Authored-By: Claude Opus 4.6 --- DEVELOPMENT.md | 30 ++---------------------------- Makefile | 2 +- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 9063a907..d45e422c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -374,19 +374,6 @@ The `make dev` command automatically detects macOS and: - Signs with required entitlements - Runs with hot reload (no sudo required) -### Alternative Commands - -```bash -# Build and sign only (no hot reload) -make sign-darwin - -# Verify entitlements are correct -make verify-entitlements - -# Run manually after signing -./bin/hypeman -``` - ### Key Differences from Linux Development | Aspect | Linux | macOS | @@ -457,22 +444,9 @@ Note: Full integration tests require Linux. On macOS, focus on unit tests and ma ### Known Limitations -1. **Disk Format**: vz only supports raw disk images (not qcow2). Convert images: - ```bash - qemu-img convert -f qcow2 -O raw disk.qcow2 disk.raw - ``` - -2. **Snapshots**: Only available on macOS 14+ (Sonoma) on Apple Silicon: - ```go - // Check support at runtime - valid, err := vmConfig.ValidateSaveRestoreSupport() - ``` - -3. **Network Ingress**: VMs get DHCP addresses from macOS NAT. To access a VM's services: - - Query the VM's IP via guest agent - - Use vsock for internal communication (no NAT traversal needed) +1. **Disk Format**: vz only supports raw disk images (not qcow2). The image pipeline handles conversion automatically. -4. **In-Process VMM**: Unlike CH/QEMU which run as separate processes, vz VMs run in the hypeman process. If hypeman crashes, all VMs stop. +2. **Snapshots**: Not currently supported on the vz hypervisor. ### Troubleshooting diff --git a/Makefile b/Makefile index b73bd05d..566125a1 100644 --- a/Makefile +++ b/Makefile @@ -347,7 +347,7 @@ dev-darwin: build-embedded $(AIR) @rm -f ./tmp/main PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" $(AIR) -c .air.darwin.toml -# Run without hot reload (for testing) +# Run without hot reload (for agents) run: @if [ "$$(uname)" = "Darwin" ]; then \ $(MAKE) run-darwin; \ From af20400e857257f6e50036f9dc8332f47aa9bec6 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 12 Feb 2026 15:59:01 -0500 Subject: [PATCH 16/24] fix: README treats Linux and macOS as equal platforms - Requirements: remove "Production"/"Experimental" labels - Quick Start: "Linux and macOS supported" - CLI section: reword for local-first usage, remove "remote" framing - Remove entire "macOS Support" section (platform details belong in DEVELOPMENT.md, not the user-facing README) Co-Authored-By: Claude Opus 4.6 --- README.md | 83 ++++++++----------------------------------------------- 1 file changed, 11 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 324102fe..93de5d51 100644 --- a/README.md +++ b/README.md @@ -21,27 +21,25 @@ ## Requirements -### Linux (Production) -Hypeman server runs on **Linux** with **KVM** virtualization support. Supports Cloud Hypervisor and QEMU as hypervisors. +### Linux +**KVM** virtualization support required. Supports Cloud Hypervisor and QEMU as hypervisors. -### macOS (Experimental) -Hypeman also supports **macOS** (11.0+) using Apple's **Virtualization.framework** via the `vz` hypervisor. See [macOS Support](#macos-support) below. - -The CLI can run locally on the server or connect remotely from any machine. +### macOS +**macOS 11.0+** on Apple Silicon. Uses Apple's Virtualization.framework via the `vz` hypervisor. ## Quick Start -Install Hypeman on your Linux server: +Install Hypeman (Linux and macOS supported): ```bash curl -fsSL https://get.hypeman.sh | bash ``` -This installs both the Hypeman server and CLI. The installer handles all dependencies, KVM access, and network configuration automatically. +This installs both the Hypeman server and CLI. The installer handles all dependencies and configuration automatically. -## CLI Installation (Remote Access) +## CLI Installation -To connect to a Hypeman server from another machine, install just the CLI: +To use Hypeman via the CLI on a separate machine: **Homebrew:** ```bash @@ -53,17 +51,11 @@ brew install kernel/tap/hypeman go install 'github.com/kernel/hypeman-cli/cmd/hypeman@latest' ``` -**Configure remote access:** - -1. On the server, generate an API token: -```bash -hypeman-token -``` +**Configure CLI access:** -2. On your local machine, set the environment variables: ```bash -export HYPEMAN_API_KEY="" -export HYPEMAN_BASE_URL="http://:8080" +export HYPEMAN_API_KEY="" +export HYPEMAN_BASE_URL="http://:8080" ``` ## Usage @@ -159,59 +151,6 @@ hypeman logs --source hypeman my-app For all available commands, run `hypeman --help`. -## macOS Support - -Hypeman supports macOS using Apple's Virtualization.framework through the `vz` hypervisor. This provides native virtualization on Apple Silicon Macs (Intel Macs are not supported). - -### Requirements - -- macOS 11.0+ (macOS 14.0+ required for snapshot/restore on ARM64) -- Apple Silicon (M1/M2/M3) recommended -- Caddy: `brew install caddy` -- e2fsprogs: `brew install e2fsprogs` (for ext4 disk images) - -### Quick Start (macOS) - -```bash -# Install dependencies -brew install caddy e2fsprogs - -# Add e2fsprogs to PATH (it's keg-only) -export PATH="/opt/homebrew/opt/e2fsprogs/bin:/opt/homebrew/opt/e2fsprogs/sbin:$PATH" - -# Configure environment -cp .env.darwin.example .env - -# Create data directory -mkdir -p ~/Library/Application\ Support/hypeman - -# Run with hot reload (auto-detects macOS, builds, signs, and runs) -make dev -``` - -The `make dev` command automatically detects macOS and handles building with vz support and signing with required entitlements. - -### Key Differences from Linux - -| Feature | Linux | macOS | -|---------|-------|-------| -| Hypervisors | Cloud Hypervisor, QEMU | vz (Virtualization.framework) | -| Networking | TAP devices, bridges, iptables | NAT (built-in, automatic) | -| Rate Limiting | HTB/tc | Not supported | -| GPU Passthrough | VFIO | Not supported | -| Disk Format | qcow2, raw | raw only | -| Snapshots | Always available | macOS 14+ ARM64 only | - -### Limitations - -- **Networking**: macOS uses NAT networking automatically. No manual bridge/TAP configuration needed, but ingress requires discovering the VM's NAT IP. -- **Rate Limiting**: Network and disk I/O rate limiting is not available on macOS. -- **GPU**: PCI device passthrough is not supported on macOS. -- **Disk Images**: qcow2 format is not directly supported; use raw disk images. -- **Snapshots**: Requires macOS 14.0+ on Apple Silicon (ARM64). - -For detailed development setup, see [DEVELOPMENT.md](DEVELOPMENT.md). - ## Development See [DEVELOPMENT.md](DEVELOPMENT.md) for build instructions, configuration options, and contributing guidelines. From 5c900369d8bb61881cf18d09642f1de184ed7736 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 14:32:20 -0500 Subject: [PATCH 17/24] Address PR review: fix shutdown semantics, reduce timeout, add vz integration tests Changes based on PR review feedback: - Reduce vz HTTP client timeout from 30s to 10s (local Unix socket) - Add comment on 2GB memory safety default in vz-shim - Fix graceful shutdown to only send ACPI power button without immediate force-kill fallback, aligning with CH/QEMU semantics - Add macOS vz integration tests (TestVZBasicLifecycle, TestVZExecAndShutdown) Test infrastructure improvements: - Use short /tmp/ paths for vz test temp dirs to avoid macOS 104-byte Unix socket path limit (t.TempDir() paths are too long) - Capture vz-shim stderr and log file contents in error messages for better diagnostics when shim fails to start Co-Authored-By: Claude Opus 4.6 --- cmd/vz-shim/main.go | 2 + cmd/vz-shim/server.go | 17 +- cmd/vz-shim/vm.go | 2 +- lib/hypervisor/vz/client.go | 2 +- lib/hypervisor/vz/starter.go | 37 ++- lib/instances/manager_darwin_test.go | 328 +++++++++++++++++++++++++++ 6 files changed, 372 insertions(+), 16 deletions(-) create mode 100644 lib/instances/manager_darwin_test.go diff --git a/cmd/vz-shim/main.go b/cmd/vz-shim/main.go index f7ce1907..1a1c887f 100644 --- a/cmd/vz-shim/main.go +++ b/cmd/vz-shim/main.go @@ -49,11 +49,13 @@ func main() { vm, vmConfig, err := createVM(config) if err != nil { slog.Error("failed to create VM", "error", err) + fmt.Fprintf(os.Stderr, "failed to create VM: %v\n", err) os.Exit(1) } if err := vm.Start(); err != nil { slog.Error("failed to start VM", "error", err) + fmt.Fprintf(os.Stderr, "failed to start VM: %v\n", err) os.Exit(1) } slog.Info("VM started", "vcpus", config.VCPUs, "memory_mb", config.MemoryBytes/1024/1024) diff --git a/cmd/vz-shim/server.go b/cmd/vz-shim/server.go index 38c29a40..fe5ab603 100644 --- a/cmd/vz-shim/server.go +++ b/cmd/vz-shim/server.go @@ -104,20 +104,17 @@ func (s *ShimServer) handleShutdown(w http.ResponseWriter, r *http.Request) { s.mu.Lock() defer s.mu.Unlock() - // Request graceful shutdown via guest + // Send ACPI power button (graceful shutdown signal to guest). + // The caller (instance manager) handles timeout/force-kill if the guest + // doesn't shut down in time. Force-kill is in handleVMMShutdown / killHypervisor. success, err := s.vm.RequestStop() if err != nil || !success { - slog.Warn("RequestStop failed, trying Stop", "error", err) - if s.vm.CanStop() { - if err := s.vm.Stop(); err != nil { - slog.Error("failed to stop VM", "error", err) - http.Error(w, fmt.Sprintf("shutdown failed: %v", err), http.StatusInternalServerError) - return - } - } + slog.Error("RequestStop failed", "error", err, "success", success) + http.Error(w, fmt.Sprintf("shutdown failed: %v", err), http.StatusInternalServerError) + return } - slog.Info("VM shutdown requested") + slog.Info("VM graceful shutdown requested (ACPI power button)") w.WriteHeader(http.StatusNoContent) } diff --git a/cmd/vz-shim/vm.go b/cmd/vz-shim/vm.go index 2c6806ac..afaedadf 100644 --- a/cmd/vz-shim/vm.go +++ b/cmd/vz-shim/vm.go @@ -259,7 +259,7 @@ func computeCPUCount(requested int) uint { func computeMemorySize(requested uint64) uint64 { if requested == 0 { - requested = 2 * 1024 * 1024 * 1024 // 2GB default + requested = 2 * 1024 * 1024 * 1024 // 2GB safety default (caller normally provides this) } maxAllowed := vz.VirtualMachineConfigurationMaximumAllowedMemorySize() diff --git a/lib/hypervisor/vz/client.go b/lib/hypervisor/vz/client.go index 50abbbe3..5f9ec0d1 100644 --- a/lib/hypervisor/vz/client.go +++ b/lib/hypervisor/vz/client.go @@ -29,7 +29,7 @@ func NewClient(socketPath string) (*Client, error) { } httpClient := &http.Client{ Transport: transport, - Timeout: 30 * time.Second, + Timeout: 10 * time.Second, } // Verify connectivity with a short timeout diff --git a/lib/hypervisor/vz/starter.go b/lib/hypervisor/vz/starter.go index 90a7b5e6..e80260b0 100644 --- a/lib/hypervisor/vz/starter.go +++ b/lib/hypervisor/vz/starter.go @@ -5,6 +5,7 @@ package vz import ( + "bytes" "context" "encoding/json" "fmt" @@ -157,9 +158,10 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s return 0, nil, fmt.Errorf("get vz-shim binary: %w", err) } + var shimStderr bytes.Buffer cmd := exec.Command(shimBinary, "-config", string(configJSON)) cmd.Stdout = nil - cmd.Stderr = nil + cmd.Stderr = &shimStderr cmd.Stdin = nil cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, @@ -172,14 +174,41 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s pid := cmd.Process.Pid log.InfoContext(ctx, "vz-shim started", "pid", pid, "control_socket", controlSocket) + // Wait for shim in a goroutine so we can detect early exit + waitDone := make(chan error, 1) + go func() { waitDone <- cmd.Wait() }() + client, err := s.waitForShim(ctx, controlSocket, 30*time.Second) if err != nil { - cmd.Process.Kill() + // Read shim log file for diagnostics (before instance dir cleanup deletes it) + shimLog := "" + if logData, readErr := os.ReadFile(logPath); readErr == nil && len(logData) > 0 { + shimLog = string(logData) + } + + // Check if shim already exited (crashed during startup) + select { + case waitErr := <-waitDone: + stderr := shimStderr.String() + details := "" + if stderr != "" { + details += fmt.Sprintf(" (stderr: %s)", stderr) + } + if shimLog != "" { + details += fmt.Sprintf(" (shim log: %s)", shimLog) + } + return 0, nil, fmt.Errorf("vz-shim exited early: %v%s", waitErr, details) + default: + // Shim still running but socket not available + cmd.Process.Kill() + <-waitDone + } + if shimLog != "" { + return 0, nil, fmt.Errorf("connect to vz-shim: %w (shim log: %s)", err, shimLog) + } return 0, nil, fmt.Errorf("connect to vz-shim: %w", err) } - go cmd.Wait() - return pid, client, nil } diff --git a/lib/instances/manager_darwin_test.go b/lib/instances/manager_darwin_test.go new file mode 100644 index 00000000..02e6f53a --- /dev/null +++ b/lib/instances/manager_darwin_test.go @@ -0,0 +1,328 @@ +//go:build darwin + +package instances + +import ( + "bytes" + "context" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "syscall" + "testing" + "time" + + "github.com/kernel/hypeman/cmd/api/config" + "github.com/kernel/hypeman/lib/devices" + "github.com/kernel/hypeman/lib/guest" + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/kernel/hypeman/lib/images" + "github.com/kernel/hypeman/lib/network" + "github.com/kernel/hypeman/lib/paths" + "github.com/kernel/hypeman/lib/resources" + "github.com/kernel/hypeman/lib/system" + "github.com/kernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// setupVZTestManager creates a test manager with a short temp directory path. +// macOS has a 104-byte limit on Unix socket paths, and t.TempDir() creates paths +// under /var/folders/... which are too long for the nested socket paths used by vz-shim. +func setupVZTestManager(t *testing.T) (*manager, string) { + tmpDir, err := os.MkdirTemp("/tmp", "vz-") + require.NoError(t, err) + t.Cleanup(func() { os.RemoveAll(tmpDir) }) + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + p := paths.New(tmpDir) + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemManager := system.NewManager(p) + networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) + volumeManager := volumes.NewManager(p, 0, nil) + limits := ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 0, + MaxMemoryPerInstance: 0, + } + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, "", nil, nil).(*manager) + + resourceMgr := resources.NewManager(cfg, p) + resourceMgr.SetInstanceLister(mgr) + resourceMgr.SetImageLister(imageManager) + resourceMgr.SetVolumeLister(volumeManager) + err = resourceMgr.Initialize(context.Background()) + require.NoError(t, err) + mgr.SetResourceValidator(resourceMgr) + + t.Cleanup(func() { + cleanupOrphanedProcesses(t, mgr) + }) + + return mgr, tmpDir +} + +// vzExecCommand runs a command in the guest via vsock exec. +func vzExecCommand(ctx context.Context, inst *Instance, command ...string) (string, int, error) { + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return "", -1, err + } + + var stdout, stderr bytes.Buffer + exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ + Command: command, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + if err != nil { + return stderr.String(), -1, err + } + + output := stdout.String() + if stderr.Len() > 0 { + output += "\nSTDERR: " + stderr.String() + } + return output, exit.Code, nil +} + +// TestVZBasicLifecycle tests the full vz instance lifecycle: create, exec, stop, start, delete. +func TestVZBasicLifecycle(t *testing.T) { + if runtime.GOOS != "darwin" { + t.Skip("vz tests require macOS") + } + + mgr, tmpDir := setupVZTestManager(t) + ctx := context.Background() + p := paths.New(tmpDir) + + // Prepare image + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + t.Log("Pulling alpine:latest image...") + alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/alpine:latest", + }) + require.NoError(t, err) + + imageName := alpineImage.Name + for i := 0; i < 60; i++ { + img, err := imageManager.GetImage(ctx, imageName) + if err == nil && img.Status == images.StatusReady { + alpineImage = img + break + } + if err == nil && img.Status == images.StatusFailed { + t.Fatalf("Image build failed: %s", *img.Error) + } + time.Sleep(1 * time.Second) + } + require.Equal(t, images.StatusReady, alpineImage.Status, "Image should be ready") + t.Log("Alpine image ready") + + // Ensure system files (kernel + initrd) + systemManager := system.NewManager(p) + err = systemManager.EnsureSystemFiles(ctx) + require.NoError(t, err) + t.Log("System files ready") + + // Create instance using vz hypervisor + inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ + Name: "test-vz-lifecycle", + Image: "docker.io/library/alpine:latest", + Size: 2 * 1024 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: false, + Hypervisor: hypervisor.TypeVZ, + Env: map[string]string{"TEST_VAR": "hello"}, + }) + if err != nil { + dumpVZShimLogs(t, tmpDir) + require.NoError(t, err) + } + require.NotNil(t, inst) + assert.Equal(t, StateRunning, inst.State) + assert.Equal(t, hypervisor.TypeVZ, inst.HypervisorType) + t.Logf("Instance created: %s (hypervisor: %s)", inst.Id, inst.HypervisorType) + + t.Cleanup(func() { + t.Log("Cleaning up instance...") + mgr.DeleteInstance(ctx, inst.Id) + }) + + // Wait for guest agent to be ready + err = waitForExecAgent(ctx, mgr, inst.Id, 30*time.Second) + require.NoError(t, err, "guest agent should be ready") + t.Log("Guest agent ready") + + // Exec test: echo hello + output, exitCode, err := vzExecCommand(ctx, inst, "echo", "hello") + require.NoError(t, err, "exec should succeed") + require.Equal(t, 0, exitCode) + assert.Equal(t, "hello", strings.TrimSpace(output)) + t.Log("Exec test passed") + + // Graceful shutdown test + t.Log("Stopping instance (graceful shutdown)...") + inst, err = mgr.StopInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStopped, inst.State) + t.Log("Instance stopped") + + // Verify hypervisor process is gone + if inst.HypervisorPID != nil { + time.Sleep(500 * time.Millisecond) + err := checkProcessGone(*inst.HypervisorPID) + assert.NoError(t, err, "hypervisor process should be gone after stop") + } + + // Delete test + t.Log("Deleting instance...") + err = mgr.DeleteInstance(ctx, inst.Id) + require.NoError(t, err) + + assert.NoDirExists(t, p.InstanceDir(inst.Id)) + _, err = mgr.GetInstance(ctx, inst.Id) + assert.ErrorIs(t, err, ErrNotFound) + t.Log("Instance deleted and cleaned up") +} + +// TestVZExecAndShutdown focuses on exec behavior and graceful shutdown. +func TestVZExecAndShutdown(t *testing.T) { + if runtime.GOOS != "darwin" { + t.Skip("vz tests require macOS") + } + + mgr, tmpDir := setupVZTestManager(t) + ctx := context.Background() + p := paths.New(tmpDir) + + // Prepare image + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + t.Log("Pulling alpine:latest image...") + alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/alpine:latest", + }) + require.NoError(t, err) + + imageName := alpineImage.Name + for i := 0; i < 60; i++ { + img, err := imageManager.GetImage(ctx, imageName) + if err == nil && img.Status == images.StatusReady { + alpineImage = img + break + } + if err == nil && img.Status == images.StatusFailed { + t.Fatalf("Image build failed: %s", *img.Error) + } + time.Sleep(1 * time.Second) + } + require.Equal(t, images.StatusReady, alpineImage.Status, "Image should be ready") + + systemManager := system.NewManager(p) + err = systemManager.EnsureSystemFiles(ctx) + require.NoError(t, err) + + inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ + Name: "test-vz-exec", + Image: "docker.io/library/alpine:latest", + Size: 2 * 1024 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: false, + Hypervisor: hypervisor.TypeVZ, + }) + if err != nil { + dumpVZShimLogs(t, tmpDir) + require.NoError(t, err) + } + assert.Equal(t, StateRunning, inst.State) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + mgr.DeleteInstance(ctx, inst.Id) + }) + + err = waitForExecAgent(ctx, mgr, inst.Id, 30*time.Second) + require.NoError(t, err, "guest agent should be ready") + + // Test: echo hello + output, exitCode, err := vzExecCommand(ctx, inst, "echo", "hello") + require.NoError(t, err) + require.Equal(t, 0, exitCode) + assert.Equal(t, "hello", strings.TrimSpace(output)) + t.Log("echo test passed") + + // Test: nonexistent command should error, not hang + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + require.NoError(t, err) + + start := time.Now() + var stdout, stderr strings.Builder + _, err = guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ + Command: []string{"nonexistent_command_xyz"}, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + elapsed := time.Since(start) + require.Error(t, err, "exec should fail for nonexistent command") + require.Less(t, elapsed, 5*time.Second, "exec should not hang") + t.Logf("Nonexistent command failed correctly in %v", elapsed) + + // Graceful shutdown + t.Log("Stopping instance...") + inst, err = mgr.StopInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStopped, inst.State) + t.Log("Instance stopped gracefully") + + // Delete + err = mgr.DeleteInstance(ctx, inst.Id) + require.NoError(t, err) + _, err = mgr.GetInstance(ctx, inst.Id) + assert.ErrorIs(t, err, ErrNotFound) + t.Log("Instance deleted") +} + +// dumpVZShimLogs logs any vz-shim log files found under tmpDir for debugging CI failures. +func dumpVZShimLogs(t *testing.T, tmpDir string) { + t.Helper() + logFiles, _ := filepath.Glob(filepath.Join(tmpDir, "guests", "*", "logs", "vz-shim.log")) + for _, logFile := range logFiles { + content, err := os.ReadFile(logFile) + if err == nil && len(content) > 0 { + t.Logf("vz-shim log (%s):\n%s", logFile, string(content)) + } + } +} + +// checkProcessGone verifies a process no longer exists. +func checkProcessGone(pid int) error { + proc, err := os.FindProcess(pid) + if err != nil { + return nil + } + err = proc.Signal(syscall.Signal(0)) + if err != nil { + return nil // Process doesn't exist + } + return fmt.Errorf("process %d still running", pid) +} From adbc938b8e645b12fb0558601ce70534f2bdb6d5 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 15:14:26 -0500 Subject: [PATCH 18/24] fix(vz): fix guest-agent exec format error on instance restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a force-kill (vm.Stop), the overlay filesystem could have a corrupted guest-agent binary. The lazy copy optimization skipped re-copying the binary if it already existed, causing exec format error on restart. Always copy from initrd to ensure correctness. Also adds restart coverage to TestVZBasicLifecycle (stop → start → exec → verify) with diagnostic log dumping on failure. Co-Authored-By: Claude Opus 4.6 --- lib/instances/manager_darwin_test.go | 69 +++++++++++++++++++++++++++- lib/system/init/mount.go | 13 ++---- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/lib/instances/manager_darwin_test.go b/lib/instances/manager_darwin_test.go index 02e6f53a..addffc1a 100644 --- a/lib/instances/manager_darwin_test.go +++ b/lib/instances/manager_darwin_test.go @@ -185,12 +185,77 @@ func TestVZBasicLifecycle(t *testing.T) { t.Log("Instance stopped") // Verify hypervisor process is gone - if inst.HypervisorPID != nil { + oldPID := inst.HypervisorPID + if oldPID != nil { time.Sleep(500 * time.Millisecond) - err := checkProcessGone(*inst.HypervisorPID) + err := checkProcessGone(*oldPID) assert.NoError(t, err, "hypervisor process should be gone after stop") } + // Restart test + t.Log("Starting instance (restart after stop)...") + inst, err = mgr.StartInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateRunning, inst.State) + t.Logf("Instance restarted: %s (pid: %v)", inst.Id, inst.HypervisorPID) + + // Re-read instance to get updated vsock info + inst, err = mgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Wait for exec to actually work after restart + // (can't rely on waitForExecAgent - logs from first boot still contain the marker) + t.Log("Waiting for exec to work after restart...") + var execErr error + for i := 0; i < 30; i++ { + time.Sleep(1 * time.Second) + // Re-read instance each time in case vsock info updates + inst, err = mgr.GetInstance(ctx, inst.Id) + if err != nil { + continue + } + output, exitCode, execErr = vzExecCommand(ctx, inst, "echo", "after-restart") + if execErr == nil && exitCode == 0 { + break + } + t.Logf("Exec attempt %d: err=%v", i+1, execErr) + } + if execErr != nil { + dumpVZShimLogs(t, tmpDir) + // Dump ALL log files + allLogs, _ := filepath.Glob(filepath.Join(tmpDir, "guests", "*", "logs", "*")) + for _, logFile := range allLogs { + content, err := os.ReadFile(logFile) + if err == nil && len(content) > 0 { + if len(content) > 4000 { + content = content[len(content)-4000:] + } + t.Logf("log file (%s):\n%s", logFile, string(content)) + } else if err == nil { + t.Logf("log file (%s): EMPTY", logFile) + } + } + // Check if vz-shim is still running + if inst.HypervisorPID != nil { + err := checkProcessGone(*inst.HypervisorPID) + if err != nil { + t.Logf("vz-shim process %d is still running", *inst.HypervisorPID) + } else { + t.Logf("vz-shim process %d is GONE (crashed?)", *inst.HypervisorPID) + } + } + } + require.NoError(t, execErr, "exec should succeed after restart") + require.Equal(t, 0, exitCode) + assert.Equal(t, "after-restart", strings.TrimSpace(output)) + t.Log("Exec after restart passed") + + // Stop again before delete + t.Log("Stopping instance before delete...") + inst, err = mgr.StopInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStopped, inst.State) + // Delete test t.Log("Deleting instance...") err = mgr.DeleteInstance(ctx, inst.Id) diff --git a/lib/system/init/mount.go b/lib/system/init/mount.go index 9259cc77..faf65d4d 100644 --- a/lib/system/init/mount.go +++ b/lib/system/init/mount.go @@ -224,9 +224,8 @@ func redirectToConsole(device string) { } // copyGuestAgent copies the guest-agent binary to the target location in the new root. -// It skips copying if: -// - skipGuestAgent config option is true -// - The destination file already exists (lazy copy optimization) +// Always copies from initrd to ensure the binary matches the current version and is not +// corrupted (e.g., after a force-kill that left the overlay in a dirty state). func copyGuestAgent(log *Logger, skipGuestAgent bool) error { const ( src = "/usr/local/bin/guest-agent" @@ -239,12 +238,6 @@ func copyGuestAgent(log *Logger, skipGuestAgent bool) error { return nil } - // Check if destination already exists (lazy copy - skip if already present) - if _, err := os.Stat(dst); err == nil { - log.Info("hypeman-init:agent", "guest-agent already exists, skipping copy") - return nil - } - // Create target directory if err := os.MkdirAll("/overlay/newroot/opt/hypeman", 0755); err != nil { return fmt.Errorf("mkdir: %w", err) @@ -256,7 +249,7 @@ func copyGuestAgent(log *Logger, skipGuestAgent bool) error { return fmt.Errorf("read source: %w", err) } - // Write to destination + // Write to destination (always overwrite to ensure correct binary after restarts) if err := os.WriteFile(dst, data, 0755); err != nil { return fmt.Errorf("write destination: %w", err) } From c58ed0a79ccb88b28f24cfd8c3d7ece1183290d8 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 15:26:03 -0500 Subject: [PATCH 19/24] Address hiroTamada review nits - Warn on codesign failure instead of silently swallowing (install.sh) - Fix vz control interface description: HTTP, not gRPC (README.md) - Remove dead if/else that set same path on both branches (e2e-install-test.sh) Co-Authored-By: Claude Opus 4.6 --- lib/hypervisor/README.md | 2 +- scripts/e2e-install-test.sh | 6 +----- scripts/install.sh | 4 +++- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/hypervisor/README.md b/lib/hypervisor/README.md index 11ceccd1..22e6dc3c 100644 --- a/lib/hypervisor/README.md +++ b/lib/hypervisor/README.md @@ -17,7 +17,7 @@ Hypeman originally supported only Cloud Hypervisor. This abstraction layer allow |------------|----------|---------------|-------------------| | Cloud Hypervisor | Linux | External process | HTTP API over Unix socket | | QEMU | Linux | External process | QMP over Unix socket | -| vz | macOS | Subprocess (vz-shim) | gRPC over Unix socket | +| vz | macOS | Subprocess (vz-shim) | HTTP API over Unix socket | ## How It Works diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index 6dcd9002..2aa472d6 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -140,11 +140,7 @@ export HYPEMAN_API_KEY="$API_KEY" export HYPEMAN_BASE_URL="http://localhost:${PORT:-8080}" # Determine CLI path -if [ "$OS" = "darwin" ]; then - HYPEMAN_CMD="/usr/local/bin/hypeman" -else - HYPEMAN_CMD="/usr/local/bin/hypeman" -fi +HYPEMAN_CMD="/usr/local/bin/hypeman" # Verify CLI was installed [ -x "$HYPEMAN_CMD" ] || fail "hypeman CLI not found at $HYPEMAN_CMD" diff --git a/scripts/install.sh b/scripts/install.sh index c1382e10..367b6db5 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -355,7 +355,9 @@ else ENTITLEMENTS - codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null || true + if ! codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null; then + warn "codesign failed — vz hypervisor will not work without virtualization entitlement" + fi rm -f "$ENTITLEMENTS_TMP" fi fi From 6e954ff0e18afb8edb2bb72bd72c7abcc3e90f96 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 03:04:00 +0000 Subject: [PATCH 20/24] fix: respect NetworkEnabled=false in vz shim by not creating NIC when networks is empty When NetworkEnabled=false, the instance's Networks slice is intentionally empty. The vz shim was incorrectly treating an empty networks slice as 'add default NAT NIC', which gave the guest network access even when the caller explicitly disabled networking. Now, when networks is empty, configureNetwork returns immediately without attaching any NIC, matching the behavior of QEMU and Cloud Hypervisor. Applied via @cursor push command --- cmd/vz-shim/vm.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cmd/vz-shim/vm.go b/cmd/vz-shim/vm.go index afaedadf..b995f0c7 100644 --- a/cmd/vz-shim/vm.go +++ b/cmd/vz-shim/vm.go @@ -129,21 +129,18 @@ func configureSerialConsole(vmConfig *vz.VirtualMachineConfiguration, logPath st } func configureNetwork(vmConfig *vz.VirtualMachineConfiguration, networks []shimconfig.NetworkConfig) error { - var devices []*vz.VirtioNetworkDeviceConfiguration if len(networks) == 0 { - dev, err := createNATNetworkDevice("") + // No networks configured (NetworkEnabled=false) — do not attach any NIC. + return nil + } + + var devices []*vz.VirtioNetworkDeviceConfiguration + for _, netConfig := range networks { + dev, err := createNATNetworkDevice(netConfig.MAC) if err != nil { return err } devices = append(devices, dev) - } else { - for _, netConfig := range networks { - dev, err := createNATNetworkDevice(netConfig.MAC) - if err != nil { - return err - } - devices = append(devices, dev) - } } vmConfig.SetNetworkDevicesVirtualMachineConfiguration(devices) return nil From cc8413bab5760d7c074af83ab58792f45013df14 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 22:08:43 -0500 Subject: [PATCH 21/24] Fix StartInstance call to match new signature StartInstance now takes a StartInstanceRequest parameter (from PR #99). Co-Authored-By: Claude Opus 4.6 --- lib/instances/manager_darwin_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/instances/manager_darwin_test.go b/lib/instances/manager_darwin_test.go index addffc1a..27bb1ad0 100644 --- a/lib/instances/manager_darwin_test.go +++ b/lib/instances/manager_darwin_test.go @@ -194,7 +194,7 @@ func TestVZBasicLifecycle(t *testing.T) { // Restart test t.Log("Starting instance (restart after stop)...") - inst, err = mgr.StartInstance(ctx, inst.Id) + inst, err = mgr.StartInstance(ctx, inst.Id, StartInstanceRequest{}) require.NoError(t, err) assert.Equal(t, StateRunning, inst.State) t.Logf("Instance restarted: %s (pid: %v)", inst.Id, inst.HypervisorPID) From 34596cd0714495b854b4f32d1b402888b68dfd3b Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 22:16:36 -0500 Subject: [PATCH 22/24] Fix vz tests: keep VM alive with sleep infinity After PR #99, init does reboot(POWER_OFF) when the entrypoint exits. Alpine's default entrypoint (/bin/sh) exits immediately with no stdin, killing the VM before exec tests can run. Add Cmd: sleep infinity to keep the VM alive, matching the pattern in volumes_test.go. Co-Authored-By: Claude Opus 4.6 --- lib/instances/manager_darwin_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/instances/manager_darwin_test.go b/lib/instances/manager_darwin_test.go index 27bb1ad0..9fde885a 100644 --- a/lib/instances/manager_darwin_test.go +++ b/lib/instances/manager_darwin_test.go @@ -149,6 +149,7 @@ func TestVZBasicLifecycle(t *testing.T) { Vcpus: 1, NetworkEnabled: false, Hypervisor: hypervisor.TypeVZ, + Cmd: []string{"sleep", "infinity"}, Env: map[string]string{"TEST_VAR": "hello"}, }) if err != nil { @@ -313,6 +314,7 @@ func TestVZExecAndShutdown(t *testing.T) { Vcpus: 1, NetworkEnabled: false, Hypervisor: hypervisor.TypeVZ, + Cmd: []string{"sleep", "infinity"}, }) if err != nil { dumpVZShimLogs(t, tmpDir) From 4ed925a62e703a8e37222fb7b1402ae866e57991 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 22:31:24 -0500 Subject: [PATCH 23/24] Fix e2e test: use nginx:alpine instead of alpine:latest After PR #99, init does reboot(POWER_OFF) when the entrypoint exits. Alpine's /bin/sh exits immediately with no stdin, killing the VM before exec can run. nginx:alpine has a long-running daemon entrypoint that keeps the VM alive, matching the pattern in exec_test.go. Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index 2aa472d6..157803fd 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -152,13 +152,13 @@ pass "hypeman ps works" # VM lifecycle test E2E_VM_NAME="e2e-test-vm" -$HYPEMAN_CMD pull alpine:latest || fail "hypeman pull failed" +$HYPEMAN_CMD pull nginx:alpine || fail "hypeman pull failed" pass "hypeman pull works" # Wait for image to be available (pull is async) IMAGE_READY=false for i in $(seq 1 30); do - if $HYPEMAN_CMD run --name "$E2E_VM_NAME" alpine:latest 2>&1; then + if $HYPEMAN_CMD run --name "$E2E_VM_NAME" nginx:alpine 2>&1; then IMAGE_READY=true break fi From 07550edb24633b9b79b838fa6519c283a45460c8 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 13 Feb 2026 22:37:15 -0500 Subject: [PATCH 24/24] Fix e2e clean slate: remove data dir from previous failed runs Phase 1 was calling uninstall.sh without KEEP_DATA=false, so the data directory (including stale VMs from previous failed runs) persisted. This caused name_conflict errors when the test tried to create e2e-test-vm again. Co-Authored-By: Claude Opus 4.6 --- scripts/e2e-install-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh index 157803fd..cf7f1324 100755 --- a/scripts/e2e-install-test.sh +++ b/scripts/e2e-install-test.sh @@ -30,7 +30,7 @@ cd "$REPO_DIR" # Phase 1: Clean slate # ============================================================================= info "Phase 1: Cleaning previous installation..." -bash scripts/uninstall.sh 2>/dev/null || true +KEEP_DATA=false bash scripts/uninstall.sh 2>/dev/null || true # ============================================================================= # Phase 2: Install from source