diff --git a/.air.darwin.toml b/.air.darwin.toml new file mode 100644 index 00000000..9eb00963 --- /dev/null +++ b/.air.darwin.toml @@ -0,0 +1,48 @@ +root = "." +testdata_dir = "testdata" +tmp_dir = "tmp" + +[build] + args_bin = [] + bin = "./tmp/main" + # Build for macOS with vz support, then sign with entitlements + # Also builds and signs vz-shim (subprocess that hosts vz VMs) + cmd = "make build-embedded && go build -o ./tmp/vz-shim ./cmd/vz-shim && codesign --sign - --entitlements vz.entitlements --force ./tmp/vz-shim && mkdir -p lib/hypervisor/vz/vz-shim && cp ./tmp/vz-shim lib/hypervisor/vz/vz-shim/vz-shim && go build -tags containers_image_openpgp -o ./tmp/main ./cmd/api && codesign --sign - --entitlements vz.entitlements --force ./tmp/main" + delay = 1000 + exclude_dir = ["assets", "tmp", "vendor", "testdata", "bin", "scripts", "data", "kernel"] + exclude_file = [] + exclude_regex = ["_test.go"] + exclude_unchanged = false + follow_symlink = false + # No sudo needed on macOS - vz doesn't require root + full_bin = "./tmp/main" + include_dir = [] + include_ext = ["go", "tpl", "tmpl", "html", "yaml"] + include_file = [] + log = "build-errors.log" + poll = false + poll_interval = 0 + post_cmd = [] + kill_delay = '1s' + rerun = false + rerun_delay = 500 + send_interrupt = true + stop_on_error = false + +[color] + app = "" + build = "yellow" + main = "magenta" + runner = "green" + watcher = "cyan" + +[log] + main_only = false + time = false + +[misc] + clean_on_exit = false + +[screen] + clear_on_rebuild = false + keep_scroll = true diff --git a/.env.darwin.example b/.env.darwin.example new file mode 100644 index 00000000..f714f06e --- /dev/null +++ b/.env.darwin.example @@ -0,0 +1,122 @@ +# ============================================================================= +# macOS (Darwin) Configuration for Hypeman +# ============================================================================= +# Copy this file to .env and customize for your environment. +# +# Key differences from Linux (.env.example): +# - DEFAULT_HYPERVISOR: Use "vz" (Virtualization.framework) instead of cloud-hypervisor/qemu +# - DATA_DIR: Uses macOS conventions (~/Library/Application Support) +# - Network settings: BRIDGE_NAME, SUBNET_CIDR, etc. are IGNORED (vz uses NAT) +# - Rate limiting: Not supported on macOS (no tc/HTB equivalent) +# - GPU passthrough: Not supported on macOS +# ============================================================================= + +# Required +JWT_SECRET=dev-secret-change-me + +# Data directory - use macOS conventions +# Note: ~ expands to $HOME at runtime +DATA_DIR=~/Library/Application Support/hypeman + +# Server configuration +PORT=8080 + +# Logging +LOG_LEVEL=debug + +# ============================================================================= +# Hypervisor Configuration (IMPORTANT FOR MACOS) +# ============================================================================= +# On macOS, use "vz" (Virtualization.framework) +# - "cloud-hypervisor" and "qemu" are NOT supported on macOS +DEFAULT_HYPERVISOR=vz + +# ============================================================================= +# Network Configuration (DIFFERENT ON MACOS) +# ============================================================================= +# On macOS with vz, network is handled automatically via NAT: +# - VMs get IP addresses from 192.168.64.0/24 via DHCP +# - No TAP devices, bridges, or iptables needed +# - The following settings are IGNORED on macOS: +# BRIDGE_NAME, SUBNET_CIDR, SUBNET_GATEWAY, UPLINK_INTERFACE + +# DNS Server for VMs (used by guest for resolution) +DNS_SERVER=8.8.8.8 + +# ============================================================================= +# Caddy / Ingress Configuration +# ============================================================================= +CADDY_LISTEN_ADDRESS=0.0.0.0 +CADDY_ADMIN_ADDRESS=127.0.0.1 +CADDY_ADMIN_PORT=2019 +# Note: 5353 is used by mDNSResponder (Bonjour) on macOS, using 5354 instead +INTERNAL_DNS_PORT=5354 +CADDY_STOP_ON_SHUTDOWN=false + +# ============================================================================= +# Build System Configuration +# ============================================================================= +# For builds on macOS with vz, the registry URL needs to be accessible from +# NAT VMs. Since vz uses 192.168.64.0/24 for NAT, the host is at 192.168.64.1. +# +# IMPORTANT: "host.docker.internal" does NOT work in vz VMs - that's a Docker +# Desktop-specific hostname. Use the NAT gateway IP instead. +# +# Registry URL (the host's hypeman API, accessible from VMs) +REGISTRY_URL=192.168.64.1:8080 +# Use HTTP (not HTTPS) since hypeman's internal registry uses plaintext +REGISTRY_INSECURE=true + +BUILDER_IMAGE=hypeman/builder:latest +MAX_CONCURRENT_SOURCE_BUILDS=2 +BUILD_TIMEOUT=600 + +# ============================================================================= +# Resource Limits (same as Linux) +# ============================================================================= +# Per-instance limits +MAX_VCPUS_PER_INSTANCE=4 +MAX_MEMORY_PER_INSTANCE=8GB + +# Aggregate limits (0 or empty = unlimited) +# MAX_TOTAL_VOLUME_STORAGE= + +# ============================================================================= +# OpenTelemetry (optional, same as Linux) +# ============================================================================= +# OTEL_ENABLED=false +# OTEL_ENDPOINT=127.0.0.1:4317 +# OTEL_SERVICE_NAME=hypeman +# OTEL_INSECURE=true +# ENV=dev + +# ============================================================================= +# TLS / ACME Configuration (same as Linux) +# ============================================================================= +# ACME_EMAIL=admin@example.com +# ACME_DNS_PROVIDER=cloudflare +# TLS_ALLOWED_DOMAINS=*.example.com +# CLOUDFLARE_API_TOKEN= + +# ============================================================================= +# macOS Limitations +# ============================================================================= +# The following features are NOT AVAILABLE on macOS: +# +# 1. GPU Passthrough (VFIO, mdev) +# - GPU_PROFILE_CACHE_TTL is ignored +# - Device registration/binding will fail +# +# 2. Network Rate Limiting +# - UPLOAD_BURST_MULTIPLIER, DOWNLOAD_BURST_MULTIPLIER are ignored +# - No tc/HTB equivalent on macOS +# +# 3. CPU/Memory Hotplug +# - Resize operations not supported +# +# 4. Disk I/O Limiting +# - DISK_IO_LIMIT, OVERSUB_DISK_IO are ignored +# +# 5. Snapshots (requires macOS 14+ on Apple Silicon) +# - SaveMachineStateToPath/RestoreMachineStateFromURL require macOS 14+ +# - Only supported on ARM64 (Apple Silicon) Macs diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ee4f8a75..9ff9862d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -55,3 +55,64 @@ jobs: TLS_TEST_DOMAIN: "test.hypeman-development.com" TLS_ALLOWED_DOMAINS: '*.hypeman-development.com' run: make test + + test-darwin: + runs-on: [self-hosted, macos, arm64] + concurrency: + group: macos-ci-test-${{ github.ref }} + cancel-in-progress: true + env: + DATA_DIR: /tmp/hypeman-ci-${{ github.run_id }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v6 + with: + go-version: '1.25' + cache: false + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Install dependencies + run: | + brew list e2fsprogs &>/dev/null || brew install e2fsprogs + brew list erofs-utils &>/dev/null || brew install erofs-utils + go mod download + - name: Create run-scoped data directory + run: mkdir -p "$DATA_DIR" + - name: Generate OpenAPI code + run: make oapi-generate + - name: Build + run: make build + - name: Run tests + env: + DEFAULT_HYPERVISOR: vz + JWT_SECRET: ci-test-secret + run: make test + - name: Cleanup + if: always() + run: | + pkill -f "vz-shim.*$DATA_DIR" || true + rm -rf "$DATA_DIR" + make clean + + e2e-install: + runs-on: [self-hosted, macos, arm64] + needs: test-darwin + concurrency: + group: macos-ci-e2e-${{ github.ref }} + cancel-in-progress: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v6 + with: + go-version: '1.25' + cache: false + - name: Install dependencies + run: brew list caddy &>/dev/null || brew install caddy + - name: Run E2E install test + run: bash scripts/e2e-install-test.sh + - name: Cleanup on failure + if: failure() + run: bash scripts/uninstall.sh || true diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 85a14f8b..d45e422c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -4,7 +4,17 @@ This document covers development setup, configuration, and contributing to Hypem ## Prerequisites -> **macOS Users:** Hypeman requires KVM, which is only available on Linux. See [scripts/utm/README.md](scripts/utm/README.md) for instructions on setting up a Linux VM with nested virtualization on Apple Silicon Macs. +### Linux (Default) + +**Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq** + +### macOS (Experimental) + +See [macOS Development](#macos-development) below for native macOS development using Virtualization.framework. + +--- + +**Linux Prerequisites:** **Go 1.25.4+**, **KVM**, **erofs-utils**, **dnsmasq** @@ -111,6 +121,7 @@ Hypeman can be configured using the following environment variables: | `DNS_PROPAGATION_TIMEOUT` | Max time to wait for DNS propagation (e.g., `2m`) | _(empty)_ | | `DNS_RESOLVERS` | Comma-separated DNS resolvers for propagation checking | _(empty)_ | | `CLOUDFLARE_API_TOKEN` | Cloudflare API token (when using `cloudflare` provider) | _(empty)_ | +| `DOCKER_SOCKET` | Path to Docker socket (for builder image builds) | `/var/run/docker.sock` | **Important: Subnet Configuration** @@ -244,6 +255,15 @@ make dev The server will start on port 8080 (configurable via `PORT` environment variable). +### Setting Up the Builder Image (for Dockerfile builds) + +The builder image is required for `hypeman build` to work. When `BUILDER_IMAGE` is unset or empty, the server will automatically build and push the builder image on startup using Docker. This is the easiest way to get started — just ensure Docker is available and run `make dev`. If a build is requested while the builder image is still being prepared, the server returns a clear error asking you to retry shortly. + +On macOS with Colima, set the Docker socket path: +```bash +DOCKER_SOCKET=$HOME/.colima/default/docker.sock +``` + ### Local OpenTelemetry (optional) To collect traces and metrics locally, run the Grafana LGTM stack (Loki, Grafana, Tempo, Mimir): @@ -314,3 +334,150 @@ Or generate everything at once: ```bash make generate-all ``` + +## macOS Development + +Hypeman supports native macOS development using Apple's Virtualization.framework (via the `vz` hypervisor). + +### Requirements + +- **macOS 11.0+** (Big Sur or later) +- **Apple Silicon** (M1/M2/M3) recommended +- **macOS 14.0+** (Sonoma) required for snapshot/restore (ARM64 only) +- **Go 1.25.4+** +- **Caddy** (for ingress): `brew install caddy` +- **e2fsprogs** (for ext4 disk images): `brew install e2fsprogs` + +### Quick Start + +```bash +# 1. Install dependencies +brew install caddy e2fsprogs + +# 2. Add e2fsprogs to PATH (it's keg-only) +export PATH="/opt/homebrew/opt/e2fsprogs/bin:/opt/homebrew/opt/e2fsprogs/sbin:$PATH" +# Add to ~/.zshrc for persistence + +# 3. Configure environment +cp .env.darwin.example .env +# Edit .env as needed (defaults work for local development) + +# 4. Create data directory +mkdir -p ~/Library/Application\ Support/hypeman + +# 5. Run in development mode (auto-detects macOS, builds, signs, and runs with hot reload) +make dev +``` + +The `make dev` command automatically detects macOS and: +- Builds with vz support +- Signs with required entitlements +- Runs with hot reload (no sudo required) + +### Key Differences from Linux Development + +| Aspect | Linux | macOS | +|--------|-------|-------| +| Hypervisor | Cloud Hypervisor, QEMU | vz (Virtualization.framework) | +| Binary signing | Not required | Automatic via `make dev` or `make sign-darwin` | +| Networking | TAP + bridge + iptables | Automatic NAT (no setup needed) | +| Root/sudo | Required for networking | Not required | +| Caddy | Embedded binary | Install via `brew install caddy` | +| DNS port | 5353 | 5354 (avoids mDNSResponder conflict) | + +### macOS-Specific Configuration + +The following environment variables work differently on macOS (see `.env.darwin.example`): + +| Variable | Linux | macOS | +|----------|-------|-------| +| `DEFAULT_HYPERVISOR` | `cloud-hypervisor` | `vz` | +| `DATA_DIR` | `/var/lib/hypeman` | `~/Library/Application Support/hypeman` | +| `INTERNAL_DNS_PORT` | `5353` | `5354` (5353 is used by mDNSResponder) | +| `BRIDGE_NAME` | Used | Ignored (NAT) | +| `SUBNET_CIDR` | Used | Ignored (NAT) | +| `UPLINK_INTERFACE` | Used | Ignored (NAT) | +| Network rate limiting | Supported | Not supported | +| GPU passthrough | Supported (VFIO) | Not supported | + +### Code Organization + +Platform-specific code uses Go build tags: + +``` +lib/network/ +├── bridge_linux.go # Linux networking (TAP, bridges, iptables) +├── bridge_darwin.go # macOS stubs (uses NAT) +└── ip.go # Shared utilities + +lib/devices/ +├── discovery_linux.go # Linux PCI device discovery +├── discovery_darwin.go # macOS stubs (no passthrough) +├── mdev_linux.go # Linux vGPU (mdev) +├── mdev_darwin.go # macOS stubs +├── vfio_linux.go # Linux VFIO binding +├── vfio_darwin.go # macOS stubs +└── types.go # Shared types + +lib/hypervisor/ +├── cloudhypervisor/ # Cloud Hypervisor (Linux) +├── qemu/ # QEMU (Linux, vsock_linux.go) +└── vz/ # Virtualization.framework (macOS only) + ├── starter.go # VMStarter implementation + ├── hypervisor.go # Hypervisor interface + └── vsock.go # VsockDialer via VirtioSocketDevice +``` + +### Testing on macOS + +```bash +# Verify vz package compiles correctly +make test-vz-compile + +# Run unit tests (Linux-specific tests like networking will be skipped) +go test ./lib/hypervisor/vz/... +go test ./lib/resources/... +go test ./lib/images/... +``` + +Note: Full integration tests require Linux. On macOS, focus on unit tests and manual API testing. + +### Known Limitations + +1. **Disk Format**: vz only supports raw disk images (not qcow2). The image pipeline handles conversion automatically. + +2. **Snapshots**: Not currently supported on the vz hypervisor. + +### Troubleshooting + +**"binary needs to be signed with entitlements"** +```bash +make sign-darwin +# Or just use: make dev (handles signing automatically) +``` + +**"caddy binary is not embedded on macOS"** +```bash +brew install caddy +``` + +**"address already in use" on port 5353** +- Port 5353 is used by mDNSResponder (Bonjour) on macOS +- Use port 5354 instead: `INTERNAL_DNS_PORT=5354` in `.env` +- The `.env.darwin.example` already has this configured correctly + +**"Virtualization.framework is not available"** +- Ensure you're on macOS 11.0+ +- Check if virtualization is enabled in Recovery Mode settings + +**"snapshot not supported"** +- Requires macOS 14.0+ on Apple Silicon +- Check: `sw_vers` and `uname -m` (should be arm64) + +**VM fails to start** +- Check serial log: `$DATA_DIR/instances//serial.log` +- Ensure kernel and initrd paths are correct in config + +**IOMMU/VFIO warnings at startup** +- These are expected on macOS and can be ignored +- GPU passthrough is not supported on macOS diff --git a/Makefile b/Makefile index 53e92d72..566125a1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build build-linux build-darwin test test-linux test-darwin install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded +.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build build-linux test test-linux test-darwin install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded # Directory where local binaries will be installed BIN_DIR ?= $(CURDIR)/bin @@ -198,15 +198,17 @@ endif build-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded | $(BIN_DIR) go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api -# Build for macOS (no CH/Caddy needed; guest binaries cross-compiled for Linux) -build-darwin: build-embedded | $(BIN_DIR) - go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api - # Build all binaries build-all: build # Run in development mode with hot reload -dev: dev-linux +# On macOS, redirects to dev-darwin which uses vz instead of cloud-hypervisor +dev: + @if [ "$$(uname)" = "Darwin" ]; then \ + $(MAKE) dev-darwin; \ + else \ + $(MAKE) dev-linux; \ + fi # Linux development mode with hot reload dev-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded $(AIR) @@ -238,7 +240,7 @@ test-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded # Uses 'go list' to discover compilable packages, then filters out packages # whose test files reference Linux-only symbols (network, devices, system/init). DARWIN_EXCLUDE_PKGS := /lib/network|/lib/devices|/lib/system/init -test-darwin: build-embedded +test-darwin: build-embedded sign-vz-shim @VERBOSE_FLAG=""; \ if [ -n "$(VERBOSE)" ]; then VERBOSE_FLAG="-v"; fi; \ PKGS=$$(PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" \ @@ -282,3 +284,96 @@ clean: release-prep: download-ch-binaries build-caddy-binaries build-embedded go mod tidy +# ============================================================================= +# macOS (vz/Virtualization.framework) targets +# ============================================================================= + +# Entitlements file for macOS codesigning +ENTITLEMENTS_FILE ?= vz.entitlements + +# Build vz-shim (subprocess that hosts vz VMs) +# Also copies to embed directory so it gets embedded in the hypeman binary +.PHONY: build-vz-shim +build-vz-shim: | $(BIN_DIR) + @echo "Building vz-shim for macOS..." + go build -o $(BIN_DIR)/vz-shim ./cmd/vz-shim + mkdir -p lib/hypervisor/vz/vz-shim + cp $(BIN_DIR)/vz-shim lib/hypervisor/vz/vz-shim/vz-shim + @echo "Build complete: $(BIN_DIR)/vz-shim" + +# Sign vz-shim with entitlements +.PHONY: sign-vz-shim +sign-vz-shim: build-vz-shim + @echo "Signing $(BIN_DIR)/vz-shim with entitlements..." + codesign --sign - --entitlements $(ENTITLEMENTS_FILE) --force $(BIN_DIR)/vz-shim + @echo "Signed: $(BIN_DIR)/vz-shim" + +# Build for macOS with vz support +# Note: This builds without embedded CH/Caddy binaries since vz doesn't need them +# Guest-agent and init are cross-compiled for Linux (they run inside the VM) +.PHONY: build-darwin +build-darwin: build-embedded build-vz-shim | $(BIN_DIR) + @echo "Building hypeman for macOS with vz support..." + go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api + @echo "Build complete: $(BIN_DIR)/hypeman" + +# Sign the binary with entitlements (required for Virtualization.framework) +# Usage: make sign-darwin +.PHONY: sign-darwin +sign-darwin: build-darwin sign-vz-shim + @echo "Signing $(BIN_DIR)/hypeman with entitlements..." + codesign --sign - --entitlements $(ENTITLEMENTS_FILE) --force $(BIN_DIR)/hypeman + @echo "Verifying signature..." + codesign --display --entitlements - $(BIN_DIR)/hypeman + +# Sign with a specific identity (for distribution) +# Usage: make sign-darwin-identity IDENTITY="Developer ID Application: Your Name" +.PHONY: sign-darwin-identity +sign-darwin-identity: build-darwin + @if [ -z "$(IDENTITY)" ]; then \ + echo "Error: IDENTITY not set. Usage: make sign-darwin-identity IDENTITY='Developer ID Application: ...'"; \ + exit 1; \ + fi + @echo "Signing $(BIN_DIR)/hypeman with identity: $(IDENTITY)" + codesign --sign "$(IDENTITY)" --entitlements $(ENTITLEMENTS_FILE) --force --options runtime $(BIN_DIR)/hypeman + @echo "Verifying signature..." + codesign --verify --verbose $(BIN_DIR)/hypeman + +# Run on macOS with vz support (development mode) +# Automatically signs the binary before running +.PHONY: dev-darwin +# macOS development mode with hot reload (uses vz, no sudo needed) +dev-darwin: build-embedded $(AIR) + @rm -f ./tmp/main + PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" $(AIR) -c .air.darwin.toml + +# Run without hot reload (for agents) +run: + @if [ "$$(uname)" = "Darwin" ]; then \ + $(MAKE) run-darwin; \ + else \ + $(MAKE) run-linux; \ + fi + +run-linux: ensure-ch-binaries ensure-caddy-binaries build-embedded build + ./bin/hypeman + +run-darwin: sign-darwin + PATH="/opt/homebrew/opt/e2fsprogs/sbin:$(PATH)" ./bin/hypeman + +# Quick test of vz package compilation +.PHONY: test-vz-compile +test-vz-compile: + @echo "Testing vz package compilation..." + go build ./lib/hypervisor/vz/... + @echo "vz package compiles successfully" + +# Verify entitlements on a signed binary +.PHONY: verify-entitlements +verify-entitlements: + @if [ ! -f $(BIN_DIR)/hypeman ]; then \ + echo "Error: $(BIN_DIR)/hypeman not found. Run 'make sign-darwin' first."; \ + exit 1; \ + fi + @echo "Entitlements on $(BIN_DIR)/hypeman:" + codesign --display --entitlements - $(BIN_DIR)/hypeman diff --git a/README.md b/README.md index 69a5f18f..93de5d51 100644 --- a/README.md +++ b/README.md @@ -21,21 +21,25 @@ ## Requirements -Hypeman server runs on **Linux** with **KVM** virtualization support. The CLI can run locally on the server or connect remotely from any machine. +### Linux +**KVM** virtualization support required. Supports Cloud Hypervisor and QEMU as hypervisors. + +### macOS +**macOS 11.0+** on Apple Silicon. Uses Apple's Virtualization.framework via the `vz` hypervisor. ## Quick Start -Install Hypeman on your Linux server: +Install Hypeman (Linux and macOS supported): ```bash curl -fsSL https://get.hypeman.sh | bash ``` -This installs both the Hypeman server and CLI. The installer handles all dependencies, KVM access, and network configuration automatically. +This installs both the Hypeman server and CLI. The installer handles all dependencies and configuration automatically. -## CLI Installation (Remote Access) +## CLI Installation -To connect to a Hypeman server from another machine, install just the CLI: +To use Hypeman via the CLI on a separate machine: **Homebrew:** ```bash @@ -47,17 +51,11 @@ brew install kernel/tap/hypeman go install 'github.com/kernel/hypeman-cli/cmd/hypeman@latest' ``` -**Configure remote access:** - -1. On the server, generate an API token: -```bash -hypeman-token -``` +**Configure CLI access:** -2. On your local machine, set the environment variables: ```bash -export HYPEMAN_API_KEY="" -export HYPEMAN_BASE_URL="http://:8080" +export HYPEMAN_API_KEY="" +export HYPEMAN_BASE_URL="http://:8080" ``` ## Usage diff --git a/cmd/api/api/cp.go b/cmd/api/api/cp.go index 3b060d39..6bae53ed 100644 --- a/cmd/api/api/cp.go +++ b/cmd/api/api/cp.go @@ -11,7 +11,6 @@ import ( "github.com/gorilla/websocket" "github.com/kernel/hypeman/lib/guest" - "github.com/kernel/hypeman/lib/hypervisor" "github.com/kernel/hypeman/lib/instances" "github.com/kernel/hypeman/lib/logger" mw "github.com/kernel/hypeman/lib/middleware" @@ -219,10 +218,9 @@ func (s *ApiService) CpHandler(w http.ResponseWriter, r *http.Request) { // handleCopyTo handles copying files from client to guest // Returns the number of bytes transferred and any error. func (s *ApiService) handleCopyTo(ctx context.Context, ws *websocket.Conn, inst *instances.Instance, req CpRequest) (int64, error) { - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - return 0, fmt.Errorf("create vsock dialer: %w", err) + return 0, fmt.Errorf("get vsock dialer: %w", err) } grpcConn, err := guest.GetOrCreateConn(ctx, dialer) @@ -329,10 +327,9 @@ func (s *ApiService) handleCopyTo(ctx context.Context, ws *websocket.Conn, inst // handleCopyFrom handles copying files from guest to client // Returns the number of bytes transferred and any error. func (s *ApiService) handleCopyFrom(ctx context.Context, ws *websocket.Conn, inst *instances.Instance, req CpRequest) (int64, error) { - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - return 0, fmt.Errorf("create vsock dialer: %w", err) + return 0, fmt.Errorf("get vsock dialer: %w", err) } grpcConn, err := guest.GetOrCreateConn(ctx, dialer) diff --git a/cmd/api/api/cp_test.go b/cmd/api/api/cp_test.go index 62e62ee2..22e6f515 100644 --- a/cmd/api/api/cp_test.go +++ b/cmd/api/api/cp_test.go @@ -19,7 +19,7 @@ import ( func TestCpToAndFromInstance(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { @@ -158,7 +158,7 @@ func TestCpToAndFromInstance(t *testing.T) { func TestCpDirectoryToInstance(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { diff --git a/cmd/api/api/exec.go b/cmd/api/api/exec.go index b9f5f3b3..b1e13c2c 100644 --- a/cmd/api/api/exec.go +++ b/cmd/api/api/exec.go @@ -12,7 +12,6 @@ import ( "github.com/gorilla/websocket" "github.com/kernel/hypeman/lib/guest" - "github.com/kernel/hypeman/lib/hypervisor" "github.com/kernel/hypeman/lib/instances" "github.com/kernel/hypeman/lib/logger" mw "github.com/kernel/hypeman/lib/middleware" @@ -132,10 +131,9 @@ func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) { // Create WebSocket read/writer wrapper that handles resize messages wsConn := &wsReadWriter{ws: ws, ctx: ctx, resizeChan: resizeChan} - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(hypervisor.Type(inst.HypervisorType), inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - log.ErrorContext(ctx, "failed to create vsock dialer", "error", err) + log.ErrorContext(ctx, "failed to get vsock dialer", "error", err) ws.WriteMessage(websocket.BinaryMessage, []byte(fmt.Sprintf("Error: %v\r\n", err))) ws.WriteMessage(websocket.TextMessage, []byte(`{"exitCode":127}`)) return diff --git a/cmd/api/api/exec_test.go b/cmd/api/api/exec_test.go index 1edf5eba..04706b34 100644 --- a/cmd/api/api/exec_test.go +++ b/cmd/api/api/exec_test.go @@ -20,7 +20,7 @@ import ( func TestExecInstanceNonTTY(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { @@ -160,7 +160,7 @@ func TestExecInstanceNonTTY(t *testing.T) { func TestExecWithDebianMinimal(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } if testing.Short() { diff --git a/cmd/api/api/images_test.go b/cmd/api/api/images_test.go index 86d1ff9e..5026646a 100644 --- a/cmd/api/api/images_test.go +++ b/cmd/api/api/images_test.go @@ -225,12 +225,13 @@ func TestCreateImage_Idempotent(t *testing.T) { t.Fatal("Build failed - this is the root cause of test failures") } - // Status can be "pending" (still processing) or "ready" (already completed in fast CI) + // Status can be "pending" (still queued), "pulling" (pull started), or "ready" (completed) // The key idempotency invariant is that the digest is the same (verified above) require.Contains(t, []oapi.ImageStatus{ oapi.ImageStatus(images.StatusPending), + oapi.ImageStatus(images.StatusPulling), oapi.ImageStatus(images.StatusReady), - }, img2.Status, "status should be pending or ready") + }, img2.Status, "status should be pending, pulling, or ready") // If still pending, should have queue position if img2.Status == oapi.ImageStatus(images.StatusPending) { diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 32826665..ca7c2fbc 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -621,13 +621,12 @@ func (s *ApiService) StatInstancePath(ctx context.Context, request oapi.StatInst }, nil } - // Create vsock dialer for this hypervisor type - dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + dialer, err := s.InstanceManager.GetVsockDialer(ctx, inst.Id) if err != nil { - log.ErrorContext(ctx, "failed to create vsock dialer", "error", err) + log.ErrorContext(ctx, "failed to get vsock dialer", "error", err) return oapi.StatInstancePath500JSONResponse{ Code: "internal_error", - Message: "failed to create vsock dialer", + Message: "failed to get vsock dialer", }, nil } diff --git a/cmd/api/api/instances_test.go b/cmd/api/api/instances_test.go index 005241a4..81af2bd5 100644 --- a/cmd/api/api/instances_test.go +++ b/cmd/api/api/instances_test.go @@ -35,7 +35,7 @@ func TestGetInstance_NotFound(t *testing.T) { func TestCreateInstance_ParsesHumanReadableSizes(t *testing.T) { // Require KVM access for VM creation if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { - t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)") + t.Skip("/dev/kvm not available, skipping on this platform") } svc := newTestService(t) diff --git a/cmd/vz-shim/main.go b/cmd/vz-shim/main.go new file mode 100644 index 00000000..1a1c887f --- /dev/null +++ b/cmd/vz-shim/main.go @@ -0,0 +1,165 @@ +//go:build darwin + +// Package main implements hypeman-vz-shim, a subprocess that hosts vz VMs. +// This allows VMs to survive hypeman restarts by running in a separate process. +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "log/slog" + "net" + "net/http" + "os" + "os/signal" + "path/filepath" + "syscall" + "time" + + "github.com/Code-Hex/vz/v3" + "github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig" +) + +func main() { + configJSON := flag.String("config", "", "VM configuration as JSON") + flag.Parse() + + if *configJSON == "" { + fmt.Fprintln(os.Stderr, "error: -config is required") + os.Exit(1) + } + + var config shimconfig.ShimConfig + if err := json.Unmarshal([]byte(*configJSON), &config); err != nil { + fmt.Fprintf(os.Stderr, "error: invalid config JSON: %v\n", err) + os.Exit(1) + } + + // Setup logging to file + if err := setupLogging(config.LogPath); err != nil { + fmt.Fprintf(os.Stderr, "error: setup logging: %v\n", err) + os.Exit(1) + } + + slog.Info("vz-shim starting", "control_socket", config.ControlSocket, "vsock_socket", config.VsockSocket) + + // Create the VM + vm, vmConfig, err := createVM(config) + if err != nil { + slog.Error("failed to create VM", "error", err) + fmt.Fprintf(os.Stderr, "failed to create VM: %v\n", err) + os.Exit(1) + } + + if err := vm.Start(); err != nil { + slog.Error("failed to start VM", "error", err) + fmt.Fprintf(os.Stderr, "failed to start VM: %v\n", err) + os.Exit(1) + } + slog.Info("VM started", "vcpus", config.VCPUs, "memory_mb", config.MemoryBytes/1024/1024) + + // Create the shim server + server := NewShimServer(vm, vmConfig) + + // Start control socket listener (remove stale socket from previous run) + os.Remove(config.ControlSocket) + controlListener, err := net.Listen("unix", config.ControlSocket) + if err != nil { + slog.Error("failed to listen on control socket", "error", err, "path", config.ControlSocket) + os.Exit(1) + } + defer controlListener.Close() + + // Start vsock proxy listener (remove stale socket from previous run) + os.Remove(config.VsockSocket) + vsockListener, err := net.Listen("unix", config.VsockSocket) + if err != nil { + slog.Error("failed to listen on vsock socket", "error", err, "path", config.VsockSocket) + os.Exit(1) + } + defer vsockListener.Close() + + // Start HTTP server for control API + httpServer := &http.Server{Handler: server.Handler()} + go func() { + slog.Info("control API listening", "socket", config.ControlSocket) + if err := httpServer.Serve(controlListener); err != nil && err != http.ErrServerClosed { + slog.Error("control API server error", "error", err) + } + }() + + // Start vsock proxy + go func() { + slog.Info("vsock proxy listening", "socket", config.VsockSocket) + server.ServeVsock(vsockListener) + }() + + // Wait for shutdown signal or VM stop + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) + + // Monitor VM state + stateChanged := vm.StateChangedNotify() + go func() { + for { + select { + case <-ctx.Done(): + return + case newState := <-stateChanged: + slog.Info("VM state changed", "state", newState) + if newState == vz.VirtualMachineStateStopped || newState == vz.VirtualMachineStateError { + slog.Info("VM stopped, shutting down shim") + cancel() + return + } + } + } + }() + + select { + case sig := <-sigChan: + slog.Info("received signal, shutting down", "signal", sig) + case <-ctx.Done(): + slog.Info("context cancelled, shutting down") + } + + // Graceful shutdown + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer shutdownCancel() + + httpServer.Shutdown(shutdownCtx) + + if vm.State() == vz.VirtualMachineStateRunning { + slog.Info("stopping VM") + if vm.CanStop() { + vm.Stop() + } + } + + slog.Info("vz-shim shutdown complete") +} + +func setupLogging(logPath string) error { + if logPath == "" { + // Log to stderr if no path specified + slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))) + return nil + } + + if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil { + return fmt.Errorf("create log directory: %w", err) + } + + file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return fmt.Errorf("open log file: %w", err) + } + + slog.SetDefault(slog.New(slog.NewJSONHandler(file, &slog.HandlerOptions{Level: slog.LevelDebug}))) + return nil +} diff --git a/cmd/vz-shim/server.go b/cmd/vz-shim/server.go new file mode 100644 index 00000000..fe5ab603 --- /dev/null +++ b/cmd/vz-shim/server.go @@ -0,0 +1,257 @@ +//go:build darwin + +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "log/slog" + "net" + "net/http" + "sync" + + "github.com/Code-Hex/vz/v3" +) + +// ShimServer handles control API and vsock proxy for a vz VM. +type ShimServer struct { + vm *vz.VirtualMachine + vmConfig *vz.VirtualMachineConfiguration + mu sync.RWMutex +} + +// NewShimServer creates a new shim server. +func NewShimServer(vm *vz.VirtualMachine, vmConfig *vz.VirtualMachineConfiguration) *ShimServer { + return &ShimServer{ + vm: vm, + vmConfig: vmConfig, + } +} + +// VMInfoResponse matches the cloud-hypervisor VmInfo structure. +type VMInfoResponse struct { + State string `json:"state"` +} + +// Handler returns the HTTP handler for the control API. +func (s *ShimServer) Handler() http.Handler { + mux := http.NewServeMux() + + // Match cloud-hypervisor API patterns + mux.HandleFunc("GET /api/v1/vm.info", s.handleVMInfo) + mux.HandleFunc("PUT /api/v1/vm.pause", s.handlePause) + mux.HandleFunc("PUT /api/v1/vm.resume", s.handleResume) + mux.HandleFunc("PUT /api/v1/vm.shutdown", s.handleShutdown) + mux.HandleFunc("PUT /api/v1/vm.power-button", s.handlePowerButton) + mux.HandleFunc("GET /api/v1/vmm.ping", s.handlePing) + mux.HandleFunc("PUT /api/v1/vmm.shutdown", s.handleVMMShutdown) + + return mux +} + +func (s *ShimServer) handleVMInfo(w http.ResponseWriter, r *http.Request) { + s.mu.RLock() + defer s.mu.RUnlock() + + state := vzStateToString(s.vm.State()) + resp := VMInfoResponse{State: state} + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +func (s *ShimServer) handlePause(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.vm.CanPause() { + http.Error(w, "cannot pause VM", http.StatusBadRequest) + return + } + + if err := s.vm.Pause(); err != nil { + slog.Error("failed to pause VM", "error", err) + http.Error(w, fmt.Sprintf("pause failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("VM paused") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handleResume(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.vm.CanResume() { + http.Error(w, "cannot resume VM", http.StatusBadRequest) + return + } + + if err := s.vm.Resume(); err != nil { + slog.Error("failed to resume VM", "error", err) + http.Error(w, fmt.Sprintf("resume failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("VM resumed") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handleShutdown(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + // Send ACPI power button (graceful shutdown signal to guest). + // The caller (instance manager) handles timeout/force-kill if the guest + // doesn't shut down in time. Force-kill is in handleVMMShutdown / killHypervisor. + success, err := s.vm.RequestStop() + if err != nil || !success { + slog.Error("RequestStop failed", "error", err, "success", success) + http.Error(w, fmt.Sprintf("shutdown failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("VM graceful shutdown requested (ACPI power button)") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handlePowerButton(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + // RequestStop sends an ACPI power button event + success, err := s.vm.RequestStop() + if err != nil || !success { + slog.Error("failed to send power button", "error", err, "success", success) + http.Error(w, fmt.Sprintf("power button failed: %v", err), http.StatusInternalServerError) + return + } + + slog.Info("power button sent") + w.WriteHeader(http.StatusNoContent) +} + +func (s *ShimServer) handlePing(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("OK")) +} + +func (s *ShimServer) handleVMMShutdown(w http.ResponseWriter, r *http.Request) { + slog.Info("VMM shutdown requested") + w.WriteHeader(http.StatusNoContent) + + // Stop the VM and exit + go func() { + s.mu.Lock() + defer s.mu.Unlock() + + if s.vm.CanStop() { + s.vm.Stop() + } + // Process will exit when VM stops (monitored in main) + }() +} + +func vzStateToString(state vz.VirtualMachineState) string { + switch state { + case vz.VirtualMachineStateStopped: + return "Shutdown" + case vz.VirtualMachineStateRunning: + return "Running" + case vz.VirtualMachineStatePaused: + return "Paused" + case vz.VirtualMachineStateError: + return "Error" + case vz.VirtualMachineStateStarting: + return "Starting" + case vz.VirtualMachineStatePausing: + return "Pausing" + case vz.VirtualMachineStateResuming: + return "Resuming" + case vz.VirtualMachineStateStopping: + return "Stopping" + default: + return "Unknown" + } +} + +// ServeVsock handles vsock proxy connections using the Cloud Hypervisor protocol. +// Protocol: Client sends "CONNECT {port}\n", server responds "OK {port}\n", then proxies. +func (s *ShimServer) ServeVsock(listener net.Listener) { + for { + conn, err := listener.Accept() + if err != nil { + slog.Debug("vsock listener closed", "error", err) + return + } + go s.handleVsockConnection(conn) + } +} + +func (s *ShimServer) handleVsockConnection(conn net.Conn) { + defer conn.Close() + + // Read the CONNECT command + reader := bufio.NewReader(conn) + cmd, err := reader.ReadString('\n') + if err != nil { + slog.Error("failed to read vsock handshake", "error", err) + return + } + + // Parse "CONNECT {port}\n" + var port uint32 + if _, err := fmt.Sscanf(cmd, "CONNECT %d\n", &port); err != nil { + slog.Error("invalid vsock handshake", "cmd", cmd, "error", err) + conn.Write([]byte(fmt.Sprintf("ERR invalid command: %s", cmd))) + return + } + + slog.Debug("vsock connect request", "port", port) + + // Get vsock device and connect to guest + s.mu.RLock() + socketDevices := s.vm.SocketDevices() + s.mu.RUnlock() + + if len(socketDevices) == 0 { + slog.Error("no vsock device configured") + conn.Write([]byte("ERR no vsock device\n")) + return + } + + guestConn, err := socketDevices[0].Connect(port) + if err != nil { + slog.Error("failed to connect to guest vsock", "port", port, "error", err) + conn.Write([]byte(fmt.Sprintf("ERR connect failed: %v\n", err))) + return + } + defer guestConn.Close() + + // Send OK response (matching CH protocol) + if _, err := conn.Write([]byte(fmt.Sprintf("OK %d\n", port))); err != nil { + slog.Error("failed to send OK response", "error", err) + return + } + + slog.Debug("vsock connection established", "port", port) + + // Proxy data bidirectionally + done := make(chan struct{}, 2) + + go func() { + io.Copy(guestConn, reader) + done <- struct{}{} + }() + + go func() { + io.Copy(conn, guestConn) + done <- struct{}{} + }() + + // Wait for one direction to close + <-done +} diff --git a/cmd/vz-shim/vm.go b/cmd/vz-shim/vm.go new file mode 100644 index 00000000..b995f0c7 --- /dev/null +++ b/cmd/vz-shim/vm.go @@ -0,0 +1,273 @@ +//go:build darwin + +package main + +import ( + "fmt" + "log/slog" + "net" + "os" + "runtime" + "strings" + + "github.com/Code-Hex/vz/v3" + "github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig" +) + +// createVM creates and configures a vz.VirtualMachine from ShimConfig. +func createVM(config shimconfig.ShimConfig) (*vz.VirtualMachine, *vz.VirtualMachineConfiguration, error) { + // Prepare kernel command line (vz uses hvc0 for serial console) + kernelArgs := config.KernelArgs + if kernelArgs == "" { + kernelArgs = "console=hvc0 root=/dev/vda" + } else { + kernelArgs = strings.ReplaceAll(kernelArgs, "console=ttyS0", "console=hvc0") + } + + bootLoader, err := vz.NewLinuxBootLoader( + config.KernelPath, + vz.WithCommandLine(kernelArgs), + vz.WithInitrd(config.InitrdPath), + ) + if err != nil { + return nil, nil, fmt.Errorf("create boot loader: %w", err) + } + + vcpus := computeCPUCount(config.VCPUs) + memoryBytes := computeMemorySize(uint64(config.MemoryBytes)) + + slog.Debug("VM config", "vcpus", vcpus, "memory_bytes", memoryBytes, "kernel", config.KernelPath, "initrd", config.InitrdPath) + + vmConfig, err := vz.NewVirtualMachineConfiguration(bootLoader, vcpus, memoryBytes) + if err != nil { + return nil, nil, fmt.Errorf("create vm configuration: %w", err) + } + + if err := configureSerialConsole(vmConfig, config.SerialLogPath); err != nil { + return nil, nil, fmt.Errorf("configure serial: %w", err) + } + + if err := configureNetwork(vmConfig, config.Networks); err != nil { + return nil, nil, fmt.Errorf("configure network: %w", err) + } + + entropyConfig, err := vz.NewVirtioEntropyDeviceConfiguration() + if err != nil { + return nil, nil, fmt.Errorf("create entropy device: %w", err) + } + vmConfig.SetEntropyDevicesVirtualMachineConfiguration([]*vz.VirtioEntropyDeviceConfiguration{entropyConfig}) + + if err := configureStorage(vmConfig, config.Disks); err != nil { + return nil, nil, fmt.Errorf("configure storage: %w", err) + } + + vsockConfig, err := vz.NewVirtioSocketDeviceConfiguration() + if err != nil { + return nil, nil, fmt.Errorf("create vsock device: %w", err) + } + vmConfig.SetSocketDevicesVirtualMachineConfiguration([]vz.SocketDeviceConfiguration{vsockConfig}) + + if balloonConfig, err := vz.NewVirtioTraditionalMemoryBalloonDeviceConfiguration(); err == nil { + vmConfig.SetMemoryBalloonDevicesVirtualMachineConfiguration([]vz.MemoryBalloonDeviceConfiguration{balloonConfig}) + } + + if validated, err := vmConfig.Validate(); !validated || err != nil { + return nil, nil, fmt.Errorf("invalid vm configuration: %w", err) + } + + vm, err := vz.NewVirtualMachine(vmConfig) + if err != nil { + return nil, nil, fmt.Errorf("create virtual machine: %w", err) + } + + return vm, vmConfig, nil +} + +func configureSerialConsole(vmConfig *vz.VirtualMachineConfiguration, logPath string) error { + var serialAttachment *vz.FileHandleSerialPortAttachment + + nullRead, err := os.OpenFile("/dev/null", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("open /dev/null for reading: %w", err) + } + + if logPath != "" { + file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + nullRead.Close() + return fmt.Errorf("open serial log file: %w", err) + } + serialAttachment, err = vz.NewFileHandleSerialPortAttachment(nullRead, file) + if err != nil { + nullRead.Close() + file.Close() + return fmt.Errorf("create serial attachment: %w", err) + } + } else { + nullWrite, err := os.OpenFile("/dev/null", os.O_WRONLY, 0) + if err != nil { + nullRead.Close() + return fmt.Errorf("open /dev/null for writing: %w", err) + } + serialAttachment, err = vz.NewFileHandleSerialPortAttachment(nullRead, nullWrite) + if err != nil { + nullRead.Close() + nullWrite.Close() + return fmt.Errorf("create serial attachment: %w", err) + } + } + + consoleConfig, err := vz.NewVirtioConsoleDeviceSerialPortConfiguration(serialAttachment) + if err != nil { + return fmt.Errorf("create console config: %w", err) + } + vmConfig.SetSerialPortsVirtualMachineConfiguration([]*vz.VirtioConsoleDeviceSerialPortConfiguration{ + consoleConfig, + }) + + return nil +} + +func configureNetwork(vmConfig *vz.VirtualMachineConfiguration, networks []shimconfig.NetworkConfig) error { + if len(networks) == 0 { + // No networks configured (NetworkEnabled=false) — do not attach any NIC. + return nil + } + + var devices []*vz.VirtioNetworkDeviceConfiguration + for _, netConfig := range networks { + dev, err := createNATNetworkDevice(netConfig.MAC) + if err != nil { + return err + } + devices = append(devices, dev) + } + vmConfig.SetNetworkDevicesVirtualMachineConfiguration(devices) + return nil +} + +func createNATNetworkDevice(macAddr string) (*vz.VirtioNetworkDeviceConfiguration, error) { + natAttachment, err := vz.NewNATNetworkDeviceAttachment() + if err != nil { + return nil, fmt.Errorf("create NAT attachment: %w", err) + } + + networkConfig, err := vz.NewVirtioNetworkDeviceConfiguration(natAttachment) + if err != nil { + return nil, fmt.Errorf("create network config: %w", err) + } + + mac, err := assignMACAddress(macAddr) + if err != nil { + return nil, err + } + networkConfig.SetMACAddress(mac) + + return networkConfig, nil +} + +func assignMACAddress(macAddr string) (*vz.MACAddress, error) { + if macAddr == "" { + mac, err := vz.NewRandomLocallyAdministeredMACAddress() + if err != nil { + return nil, fmt.Errorf("generate MAC address: %w", err) + } + slog.Info("generated random MAC address", "mac", mac.String()) + return mac, nil + } + + hwAddr, err := net.ParseMAC(macAddr) + if err != nil { + slog.Warn("failed to parse MAC address, generating random", "mac", macAddr, "error", err) + mac, err := vz.NewRandomLocallyAdministeredMACAddress() + if err != nil { + return nil, fmt.Errorf("generate MAC address: %w", err) + } + return mac, nil + } + + mac, err := vz.NewMACAddress(hwAddr) + if err != nil { + slog.Warn("failed to create MAC from parsed address, generating random", "mac", macAddr, "error", err) + mac, err := vz.NewRandomLocallyAdministeredMACAddress() + if err != nil { + return nil, fmt.Errorf("generate MAC address: %w", err) + } + return mac, nil + } + + slog.Info("using specified MAC address", "mac", macAddr) + return mac, nil +} + +func configureStorage(vmConfig *vz.VirtualMachineConfiguration, disks []shimconfig.DiskConfig) error { + var storageDevices []vz.StorageDeviceConfiguration + + for _, disk := range disks { + if _, err := os.Stat(disk.Path); os.IsNotExist(err) { + return fmt.Errorf("disk image not found: %s", disk.Path) + } + + if strings.HasSuffix(disk.Path, ".qcow2") { + return fmt.Errorf("qcow2 not supported by vz, use raw format: %s", disk.Path) + } + + attachment, err := vz.NewDiskImageStorageDeviceAttachment(disk.Path, disk.Readonly) + if err != nil { + return fmt.Errorf("create disk attachment for %s: %w", disk.Path, err) + } + + blockConfig, err := vz.NewVirtioBlockDeviceConfiguration(attachment) + if err != nil { + return fmt.Errorf("create block device config: %w", err) + } + + storageDevices = append(storageDevices, blockConfig) + } + + if len(storageDevices) > 0 { + vmConfig.SetStorageDevicesVirtualMachineConfiguration(storageDevices) + } + + return nil +} + +func computeCPUCount(requested int) uint { + virtualCPUCount := uint(requested) + if virtualCPUCount == 0 { + virtualCPUCount = uint(runtime.NumCPU() - 1) + if virtualCPUCount < 1 { + virtualCPUCount = 1 + } + } + + maxAllowed := vz.VirtualMachineConfigurationMaximumAllowedCPUCount() + minAllowed := vz.VirtualMachineConfigurationMinimumAllowedCPUCount() + + if virtualCPUCount > maxAllowed { + virtualCPUCount = maxAllowed + } + if virtualCPUCount < minAllowed { + virtualCPUCount = minAllowed + } + + return virtualCPUCount +} + +func computeMemorySize(requested uint64) uint64 { + if requested == 0 { + requested = 2 * 1024 * 1024 * 1024 // 2GB safety default (caller normally provides this) + } + + maxAllowed := vz.VirtualMachineConfigurationMaximumAllowedMemorySize() + minAllowed := vz.VirtualMachineConfigurationMinimumAllowedMemorySize() + + if requested > maxAllowed { + requested = maxAllowed + } + if requested < minAllowed { + requested = minAllowed + } + + return requested +} diff --git a/go.mod b/go.mod index d6691b8a..f6ce5e86 100644 --- a/go.mod +++ b/go.mod @@ -56,6 +56,8 @@ require ( require ( github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Code-Hex/go-infinity-channel v1.0.0 // indirect + github.com/Code-Hex/vz/v3 v3.7.1 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/apex/log v1.9.0 // indirect diff --git a/go.sum b/go.sum index a369c838..d33f256d 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,10 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8af github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Code-Hex/go-infinity-channel v1.0.0 h1:M8BWlfDOxq9or9yvF9+YkceoTkDI1pFAqvnP87Zh0Nw= +github.com/Code-Hex/go-infinity-channel v1.0.0/go.mod h1:5yUVg/Fqao9dAjcpzoQ33WwfdMWmISOrQloDRn3bsvY= +github.com/Code-Hex/vz/v3 v3.7.1 h1:EN1yNiyrbPq+dl388nne2NySo8I94EnPppvqypA65XM= +github.com/Code-Hex/vz/v3 v3.7.1/go.mod h1:1LsW0jqW0r0cQ+IeR4hHbjdqOtSidNCVMWhStMHGho8= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= diff --git a/lib/hypervisor/README.md b/lib/hypervisor/README.md index 2bab53d9..22e6dc3c 100644 --- a/lib/hypervisor/README.md +++ b/lib/hypervisor/README.md @@ -4,20 +4,29 @@ Provides a common interface for VM management across different hypervisors. ## Purpose -Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors (e.g., QEMU) through a unified interface, enabling: +Hypeman originally supported only Cloud Hypervisor. This abstraction layer allows supporting multiple hypervisors through a unified interface, enabling: - **Hypervisor choice per instance** - Different instances can use different hypervisors +- **Platform support** - Linux uses Cloud Hypervisor/QEMU, macOS uses Virtualization.framework - **Feature parity where possible** - Common operations work the same way - **Graceful degradation** - Features unsupported by a hypervisor can be detected and handled +## Implementations + +| Hypervisor | Platform | Process Model | Control Interface | +|------------|----------|---------------|-------------------| +| Cloud Hypervisor | Linux | External process | HTTP API over Unix socket | +| QEMU | Linux | External process | QMP over Unix socket | +| vz | macOS | Subprocess (vz-shim) | HTTP API over Unix socket | + ## How It Works The abstraction defines two key interfaces: 1. **Hypervisor** - VM lifecycle operations (create, boot, pause, resume, snapshot, restore, shutdown) -2. **ProcessManager** - Hypervisor process lifecycle (start binary, get binary path) +2. **VMStarter** - VM startup and configuration (start binary, get binary path) -Each hypervisor implementation translates the generic configuration and operations to its native format. For example, Cloud Hypervisor uses an HTTP API over a Unix socket, while QEMU would use QMP. +Each implementation translates generic configuration to its native format. Cloud Hypervisor and QEMU run as external processes with socket-based control. The vz implementation runs VMs as separate vz-shim subprocesses using Apple's Virtualization.framework. Before using optional features, callers check capabilities: @@ -27,6 +36,19 @@ if hv.Capabilities().SupportsSnapshot { } ``` +## Platform Differences + +### Linux (Cloud Hypervisor, QEMU) +- VMs run as separate processes with PIDs +- State persists across hypeman restarts (reconnect via socket) +- TAP devices and Linux bridges for networking + +### macOS (vz) +- VMs run as separate vz-shim subprocesses (detached process group) +- State persists across hypeman restarts (reconnect via socket) +- NAT networking via Virtualization.framework +- Requires code signing with virtualization entitlement + ## Hypervisor Switching Instances store their hypervisor type in metadata. An instance can switch hypervisors only when stopped (no running VM, no snapshot), since: diff --git a/lib/hypervisor/vz/client.go b/lib/hypervisor/vz/client.go new file mode 100644 index 00000000..5f9ec0d1 --- /dev/null +++ b/lib/hypervisor/vz/client.go @@ -0,0 +1,172 @@ +//go:build darwin + +package vz + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" +) + +// Client implements hypervisor.Hypervisor via HTTP to the vz-shim process. +type Client struct { + socketPath string + httpClient *http.Client +} + +// NewClient creates a new vz shim client. +func NewClient(socketPath string) (*Client, error) { + transport := &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + return net.Dial("unix", socketPath) + }, + } + httpClient := &http.Client{ + Transport: transport, + Timeout: 10 * time.Second, + } + + // Verify connectivity with a short timeout + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://vz-shim/api/v1/vmm.ping", nil) + if err != nil { + return nil, fmt.Errorf("ping shim: %w", err) + } + resp, err := httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("ping shim: %w", err) + } + resp.Body.Close() + + return &Client{ + socketPath: socketPath, + httpClient: httpClient, + }, nil +} + +var _ hypervisor.Hypervisor = (*Client)(nil) + +// vmInfoResponse matches the shim's VMInfoResponse structure. +type vmInfoResponse struct { + State string `json:"state"` +} + +func (c *Client) Capabilities() hypervisor.Capabilities { + return hypervisor.Capabilities{ + SupportsSnapshot: false, + SupportsHotplugMemory: false, + SupportsPause: true, + SupportsVsock: true, + SupportsGPUPassthrough: false, + SupportsDiskIOLimit: false, + } +} + +// doPut sends a PUT request to the shim and checks for success. +func (c *Client) doPut(ctx context.Context, path string, body io.Reader) error { + req, err := http.NewRequestWithContext(ctx, http.MethodPut, "http://vz-shim"+path, body) + if err != nil { + return err + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + resp, err := c.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNoContent && resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("%s failed with status %d: %s", path, resp.StatusCode, string(bodyBytes)) + } + return nil +} + +// doGet sends a GET request to the shim and returns the response body. +func (c *Client) doGet(ctx context.Context, path string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://vz-shim"+path, nil) + if err != nil { + return nil, err + } + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return io.ReadAll(resp.Body) +} + +func (c *Client) DeleteVM(ctx context.Context) error { + return c.doPut(ctx, "/api/v1/vm.shutdown", nil) +} + +func (c *Client) Shutdown(ctx context.Context) error { + req, err := http.NewRequestWithContext(ctx, http.MethodPut, "http://vz-shim/api/v1/vmm.shutdown", nil) + if err != nil { + return err + } + resp, err := c.httpClient.Do(req) + if err != nil { + // Connection reset is expected when shim exits + return nil + } + defer resp.Body.Close() + return nil +} + +func (c *Client) GetVMInfo(ctx context.Context) (*hypervisor.VMInfo, error) { + body, err := c.doGet(ctx, "/api/v1/vm.info") + if err != nil { + return nil, fmt.Errorf("get vm info: %w", err) + } + + var info vmInfoResponse + if err := json.Unmarshal(body, &info); err != nil { + return nil, fmt.Errorf("decode vm info: %w", err) + } + + var state hypervisor.VMState + switch info.State { + case "Running": + state = hypervisor.StateRunning + case "Paused": + state = hypervisor.StatePaused + case "Starting": + state = hypervisor.StateCreated + case "Shutdown", "Stopped", "Error": + state = hypervisor.StateShutdown + default: + state = hypervisor.StateShutdown + } + + return &hypervisor.VMInfo{State: state}, nil +} + +func (c *Client) Pause(ctx context.Context) error { + return c.doPut(ctx, "/api/v1/vm.pause", nil) +} + +func (c *Client) Resume(ctx context.Context) error { + return c.doPut(ctx, "/api/v1/vm.resume", nil) +} + +func (c *Client) Snapshot(ctx context.Context, destPath string) error { + return hypervisor.ErrNotSupported +} + +func (c *Client) ResizeMemory(ctx context.Context, bytes int64) error { + return hypervisor.ErrNotSupported +} + +func (c *Client) ResizeMemoryAndWait(ctx context.Context, bytes int64, timeout time.Duration) error { + return hypervisor.ErrNotSupported +} diff --git a/lib/hypervisor/vz/shimconfig/config.go b/lib/hypervisor/vz/shimconfig/config.go new file mode 100644 index 00000000..23056267 --- /dev/null +++ b/lib/hypervisor/vz/shimconfig/config.go @@ -0,0 +1,44 @@ +//go:build darwin + +// Package shimconfig defines the configuration types shared between +// the hypeman API server and the vz-shim subprocess. +package shimconfig + +// ShimConfig is the configuration passed from hypeman to the shim. +type ShimConfig struct { + // Compute resources + VCPUs int `json:"vcpus"` + MemoryBytes int64 `json:"memory_bytes"` + + // Storage + Disks []DiskConfig `json:"disks"` + + // Network + Networks []NetworkConfig `json:"networks"` + + // Console + SerialLogPath string `json:"serial_log_path"` + + // Boot configuration + KernelPath string `json:"kernel_path"` + InitrdPath string `json:"initrd_path"` + KernelArgs string `json:"kernel_args"` + + // Socket paths (where shim should listen) + ControlSocket string `json:"control_socket"` + VsockSocket string `json:"vsock_socket"` + + // Logging + LogPath string `json:"log_path"` +} + +// DiskConfig represents a disk attached to the VM. +type DiskConfig struct { + Path string `json:"path"` + Readonly bool `json:"readonly"` +} + +// NetworkConfig represents a network interface. +type NetworkConfig struct { + MAC string `json:"mac"` +} diff --git a/lib/hypervisor/vz/starter.go b/lib/hypervisor/vz/starter.go new file mode 100644 index 00000000..e80260b0 --- /dev/null +++ b/lib/hypervisor/vz/starter.go @@ -0,0 +1,239 @@ +//go:build darwin + +// Package vz implements the hypervisor.Hypervisor interface for +// Apple's Virtualization.framework on macOS via the vz-shim subprocess. +package vz + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig" + "github.com/kernel/hypeman/lib/logger" + "github.com/kernel/hypeman/lib/paths" +) + +func init() { + hypervisor.RegisterSocketName(hypervisor.TypeVZ, "vz.sock") + hypervisor.RegisterVsockDialerFactory(hypervisor.TypeVZ, NewVsockDialer) + hypervisor.RegisterClientFactory(hypervisor.TypeVZ, func(socketPath string) (hypervisor.Hypervisor, error) { + return NewClient(socketPath) + }) +} + +var ( + shimOnce sync.Once + shimPath string + shimErr error +) + +// extractShim extracts the embedded vz-shim binary to a temp file and codesigns it. +func extractShim() (string, error) { + shimOnce.Do(func() { + f, err := os.CreateTemp("", "vz-shim-*") + if err != nil { + shimErr = fmt.Errorf("create temp file: %w", err) + return + } + defer f.Close() + + if _, err := f.Write(vzShimBinary); err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("write vz-shim binary: %w", err) + return + } + + if err := f.Chmod(0755); err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("chmod vz-shim binary: %w", err) + return + } + + // Write embedded entitlements to a temp file for codesigning + entFile, err := os.CreateTemp("", "vz-entitlements-*.plist") + if err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("create entitlements temp file: %w", err) + return + } + defer os.Remove(entFile.Name()) + + if _, err := entFile.Write(vzEntitlements); err != nil { + os.Remove(f.Name()) + entFile.Close() + shimErr = fmt.Errorf("write entitlements file: %w", err) + return + } + entFile.Close() + + // Codesign with entitlements for Virtualization.framework + cmd := exec.Command("codesign", "--sign", "-", "--entitlements", entFile.Name(), "--force", f.Name()) + if out, err := cmd.CombinedOutput(); err != nil { + os.Remove(f.Name()) + shimErr = fmt.Errorf("codesign vz-shim: %s: %w", string(out), err) + return + } + + shimPath = f.Name() + }) + return shimPath, shimErr +} + +// Starter implements hypervisor.VMStarter for Virtualization.framework. +type Starter struct{} + +// NewStarter creates a new vz starter. +func NewStarter() *Starter { + return &Starter{} +} + +var _ hypervisor.VMStarter = (*Starter)(nil) + +func (s *Starter) SocketName() string { + return "vz.sock" +} + +// GetBinaryPath extracts the embedded vz-shim and returns its path. +func (s *Starter) GetBinaryPath(p *paths.Paths, version string) (string, error) { + return extractShim() +} + +// GetVersion returns "vz-shim". +func (s *Starter) GetVersion(p *paths.Paths) (string, error) { + return "vz-shim", nil +} + +// StartVM spawns a vz-shim subprocess to host the VM. +func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, socketPath string, config hypervisor.VMConfig) (int, hypervisor.Hypervisor, error) { + log := logger.FromContext(ctx) + + instanceDir := filepath.Dir(socketPath) + controlSocket := socketPath + vsockSocket := filepath.Join(instanceDir, "vz.vsock") + logPath := filepath.Join(instanceDir, "logs", "vz-shim.log") + + shimConfig := shimconfig.ShimConfig{ + VCPUs: config.VCPUs, + MemoryBytes: config.MemoryBytes, + SerialLogPath: config.SerialLogPath, + KernelPath: config.KernelPath, + InitrdPath: config.InitrdPath, + KernelArgs: config.KernelArgs, + ControlSocket: controlSocket, + VsockSocket: vsockSocket, + LogPath: logPath, + } + + for _, disk := range config.Disks { + shimConfig.Disks = append(shimConfig.Disks, shimconfig.DiskConfig{ + Path: disk.Path, + Readonly: disk.Readonly, + }) + } + + for _, net := range config.Networks { + shimConfig.Networks = append(shimConfig.Networks, shimconfig.NetworkConfig{ + MAC: net.MAC, + }) + } + + configJSON, err := json.Marshal(shimConfig) + if err != nil { + return 0, nil, fmt.Errorf("marshal shim config: %w", err) + } + + log.DebugContext(ctx, "spawning vz-shim", "config", string(configJSON)) + + shimBinary, err := s.GetBinaryPath(p, version) + if err != nil { + return 0, nil, fmt.Errorf("get vz-shim binary: %w", err) + } + + var shimStderr bytes.Buffer + cmd := exec.Command(shimBinary, "-config", string(configJSON)) + cmd.Stdout = nil + cmd.Stderr = &shimStderr + cmd.Stdin = nil + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + if err := cmd.Start(); err != nil { + return 0, nil, fmt.Errorf("start vz-shim: %w", err) + } + + pid := cmd.Process.Pid + log.InfoContext(ctx, "vz-shim started", "pid", pid, "control_socket", controlSocket) + + // Wait for shim in a goroutine so we can detect early exit + waitDone := make(chan error, 1) + go func() { waitDone <- cmd.Wait() }() + + client, err := s.waitForShim(ctx, controlSocket, 30*time.Second) + if err != nil { + // Read shim log file for diagnostics (before instance dir cleanup deletes it) + shimLog := "" + if logData, readErr := os.ReadFile(logPath); readErr == nil && len(logData) > 0 { + shimLog = string(logData) + } + + // Check if shim already exited (crashed during startup) + select { + case waitErr := <-waitDone: + stderr := shimStderr.String() + details := "" + if stderr != "" { + details += fmt.Sprintf(" (stderr: %s)", stderr) + } + if shimLog != "" { + details += fmt.Sprintf(" (shim log: %s)", shimLog) + } + return 0, nil, fmt.Errorf("vz-shim exited early: %v%s", waitErr, details) + default: + // Shim still running but socket not available + cmd.Process.Kill() + <-waitDone + } + if shimLog != "" { + return 0, nil, fmt.Errorf("connect to vz-shim: %w (shim log: %s)", err, shimLog) + } + return 0, nil, fmt.Errorf("connect to vz-shim: %w", err) + } + + return pid, client, nil +} + +// RestoreVM is not supported by vz (Virtualization.framework cannot restore Linux guests). +func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) { + return 0, nil, hypervisor.ErrNotSupported +} + +func (s *Starter) waitForShim(ctx context.Context, socketPath string, timeout time.Duration) (*Client, error) { + deadline := time.Now().Add(timeout) + + for time.Now().Before(deadline) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + client, err := NewClient(socketPath) + if err == nil { + return client, nil + } + + time.Sleep(100 * time.Millisecond) + } + + return nil, fmt.Errorf("timeout waiting for shim socket: %s", socketPath) +} diff --git a/lib/hypervisor/vz/vsock.go b/lib/hypervisor/vz/vsock.go new file mode 100644 index 00000000..2243ab7d --- /dev/null +++ b/lib/hypervisor/vz/vsock.go @@ -0,0 +1,111 @@ +//go:build darwin + +package vz + +import ( + "bufio" + "context" + "fmt" + "log/slog" + "net" + "strings" + "time" + + "github.com/kernel/hypeman/lib/hypervisor" +) + +const ( + vsockDialTimeout = 5 * time.Second + vsockHandshakeTimeout = 5 * time.Second +) + +// VsockDialer implements hypervisor.VsockDialer for vz via the shim's Unix socket proxy. +// Uses the same protocol as Cloud Hypervisor: CONNECT {port}\n -> OK {port}\n +type VsockDialer struct { + socketPath string // path to vz.vsock Unix socket +} + +// NewVsockDialer creates a new VsockDialer for vz. +// vsockSocket is the path to the vz.vsock Unix socket proxy. +// vsockCID is unused because the vz proxy is per-VM (unlike QEMU which uses kernel AF_VSOCK with CID routing). +func NewVsockDialer(vsockSocket string, vsockCID int64) hypervisor.VsockDialer { + return &VsockDialer{ + socketPath: vsockSocket, + } +} + +// Key returns a unique identifier for this dialer, used for connection pooling. +func (d *VsockDialer) Key() string { + return "vz:" + d.socketPath +} + +// DialVsock connects to the guest on the specified port via the shim's vsock proxy. +func (d *VsockDialer) DialVsock(ctx context.Context, port int) (net.Conn, error) { + slog.DebugContext(ctx, "connecting to vsock via shim proxy", "socket", d.socketPath, "port", port) + + // Use dial timeout, respecting context deadline if shorter + dialTimeout := vsockDialTimeout + if deadline, ok := ctx.Deadline(); ok { + if remaining := time.Until(deadline); remaining < dialTimeout { + dialTimeout = remaining + } + } + + // Connect to the shim's vsock proxy Unix socket + dialer := net.Dialer{Timeout: dialTimeout} + conn, err := dialer.DialContext(ctx, "unix", d.socketPath) + if err != nil { + return nil, fmt.Errorf("dial vsock proxy socket %s: %w", d.socketPath, err) + } + + slog.DebugContext(ctx, "connected to vsock proxy, performing handshake", "port", port) + + // Set deadline for handshake + if err := conn.SetDeadline(time.Now().Add(vsockHandshakeTimeout)); err != nil { + conn.Close() + return nil, fmt.Errorf("set handshake deadline: %w", err) + } + + // Perform handshake (same protocol as Cloud Hypervisor) + handshakeCmd := fmt.Sprintf("CONNECT %d\n", port) + if _, err := conn.Write([]byte(handshakeCmd)); err != nil { + conn.Close() + return nil, fmt.Errorf("send vsock handshake: %w", err) + } + + // Read handshake response + reader := bufio.NewReader(conn) + response, err := reader.ReadString('\n') + if err != nil { + conn.Close() + return nil, fmt.Errorf("read vsock handshake response (is guest-agent running?): %w", err) + } + + // Clear deadline after successful handshake + if err := conn.SetDeadline(time.Time{}); err != nil { + conn.Close() + return nil, fmt.Errorf("clear deadline: %w", err) + } + + response = strings.TrimSpace(response) + if !strings.HasPrefix(response, "OK ") { + conn.Close() + return nil, fmt.Errorf("vsock handshake failed: %s", response) + } + + slog.DebugContext(ctx, "vsock handshake successful", "response", response) + + // Return wrapped connection that uses the bufio.Reader + return &bufferedConn{Conn: conn, reader: reader}, nil +} + +// bufferedConn wraps a net.Conn with a bufio.Reader to ensure any buffered +// data from the handshake is properly drained before reading from the connection. +type bufferedConn struct { + net.Conn + reader *bufio.Reader +} + +func (c *bufferedConn) Read(p []byte) (int, error) { + return c.reader.Read(p) +} diff --git a/lib/hypervisor/vz/vz.entitlements b/lib/hypervisor/vz/vz.entitlements new file mode 100644 index 00000000..c5004a80 --- /dev/null +++ b/lib/hypervisor/vz/vz.entitlements @@ -0,0 +1,12 @@ + + + + + com.apple.security.virtualization + + com.apple.security.network.server + + com.apple.security.network.client + + + diff --git a/lib/hypervisor/vz/vz_entitlements.go b/lib/hypervisor/vz/vz_entitlements.go new file mode 100644 index 00000000..82b8bf47 --- /dev/null +++ b/lib/hypervisor/vz/vz_entitlements.go @@ -0,0 +1,8 @@ +//go:build darwin + +package vz + +import _ "embed" + +//go:embed vz.entitlements +var vzEntitlements []byte diff --git a/lib/hypervisor/vz/vz_shim_binary.go b/lib/hypervisor/vz/vz_shim_binary.go new file mode 100644 index 00000000..fd278761 --- /dev/null +++ b/lib/hypervisor/vz/vz_shim_binary.go @@ -0,0 +1,11 @@ +//go:build darwin + +package vz + +import _ "embed" + +// vzShimBinary contains the embedded vz-shim binary. +// Built by the Makefile before the main binary is compiled. +// +//go:embed vz-shim/vz-shim +var vzShimBinary []byte diff --git a/lib/images/oci_public.go b/lib/images/oci_public.go index a7a7c53f..eb87b0db 100644 --- a/lib/images/oci_public.go +++ b/lib/images/oci_public.go @@ -25,6 +25,11 @@ func (c *OCIClient) InspectManifest(ctx context.Context, imageRef string) (strin return c.client.inspectManifest(ctx, imageRef) } +// InspectManifestForLinux is an alias for InspectManifest (all images target Linux) +func (c *OCIClient) InspectManifestForLinux(ctx context.Context, imageRef string) (string, error) { + return c.InspectManifest(ctx, imageRef) +} + // PullAndUnpack pulls an OCI image and unpacks it to a directory (public for system manager). // Always targets Linux platform since hypeman VMs are Linux guests. func (c *OCIClient) PullAndUnpack(ctx context.Context, imageRef, digest, exportDir string) error { @@ -34,3 +39,8 @@ func (c *OCIClient) PullAndUnpack(ctx context.Context, imageRef, digest, exportD } return nil } + +// PullAndUnpackForLinux is an alias for PullAndUnpack (all images target Linux) +func (c *OCIClient) PullAndUnpackForLinux(ctx context.Context, imageRef, digest, exportDir string) error { + return c.PullAndUnpack(ctx, imageRef, digest, exportDir) +} diff --git a/lib/instances/README.md b/lib/instances/README.md index a2d42172..51a245ef 100644 --- a/lib/instances/README.md +++ b/lib/instances/README.md @@ -1,12 +1,12 @@ # Instance Manager -Manages VM instance lifecycle using Cloud Hypervisor. +Manages VM instance lifecycle across multiple hypervisors (Cloud Hypervisor, QEMU on Linux; vz on macOS). ## Design Decisions ### Why State Machine? (state.go) -**What:** Single-hop state transitions matching Cloud Hypervisor's actual states +**What:** Single-hop state transitions matching hypervisor states **Why:** - Validates transitions before execution (prevents invalid operations) @@ -132,6 +132,6 @@ TestStorageOperations - metadata persistence, directory cleanup - `lib/images` - Image manager for OCI image validation - `lib/system` - System manager for kernel/initrd files -- `lib/vmm` - Cloud Hypervisor client for VM operations -- System tools: `mkfs.erofs`, `cpio`, `gzip` +- `lib/hypervisor` - Hypervisor abstraction for VM operations +- System tools: `mkfs.erofs`, `cpio`, `gzip` (Linux); `mkfs.ext4` (macOS) diff --git a/lib/instances/create.go b/lib/instances/create.go index 33724968..003a984c 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -113,6 +113,11 @@ func (m *manager) createInstance( vsockSocket := m.paths.InstanceVsockSocket(id) log.DebugContext(ctx, "generated vsock config", "instance_id", id, "cid", vsockCID) + // Override vsock socket path for vz (uses Virtio socket, not vhost-user) + if req.Hypervisor == hypervisor.TypeVZ || (req.Hypervisor == "" && m.defaultHypervisor == hypervisor.TypeVZ) { + vsockSocket = filepath.Join(m.paths.InstanceDir(id), "vz.vsock") + } + // 5. Check instance doesn't already exist if _, err := m.loadMetadata(id); err == nil { return nil, ErrAlreadyExists @@ -712,10 +717,19 @@ func (m *manager) buildHypervisorConfig(ctx context.Context, inst *Instance, ima PCIDevices: pciDevices, KernelPath: kernelPath, InitrdPath: initrdPath, - KernelArgs: "console=ttyS0", + KernelArgs: m.kernelArgs(inst.HypervisorType), }, nil } +// kernelArgs returns the kernel command line arguments for the given hypervisor type. +// vz uses hvc0 (virtio console), all others use ttyS0 (serial port). +func (m *manager) kernelArgs(hvType hypervisor.Type) string { + if hvType == hypervisor.TypeVZ { + return "console=hvc0" + } + return "console=ttyS0" +} + func ptr[T any](v T) *T { return &v } diff --git a/lib/instances/manager_darwin_test.go b/lib/instances/manager_darwin_test.go new file mode 100644 index 00000000..9fde885a --- /dev/null +++ b/lib/instances/manager_darwin_test.go @@ -0,0 +1,395 @@ +//go:build darwin + +package instances + +import ( + "bytes" + "context" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "syscall" + "testing" + "time" + + "github.com/kernel/hypeman/cmd/api/config" + "github.com/kernel/hypeman/lib/devices" + "github.com/kernel/hypeman/lib/guest" + "github.com/kernel/hypeman/lib/hypervisor" + "github.com/kernel/hypeman/lib/images" + "github.com/kernel/hypeman/lib/network" + "github.com/kernel/hypeman/lib/paths" + "github.com/kernel/hypeman/lib/resources" + "github.com/kernel/hypeman/lib/system" + "github.com/kernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// setupVZTestManager creates a test manager with a short temp directory path. +// macOS has a 104-byte limit on Unix socket paths, and t.TempDir() creates paths +// under /var/folders/... which are too long for the nested socket paths used by vz-shim. +func setupVZTestManager(t *testing.T) (*manager, string) { + tmpDir, err := os.MkdirTemp("/tmp", "vz-") + require.NoError(t, err) + t.Cleanup(func() { os.RemoveAll(tmpDir) }) + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + p := paths.New(tmpDir) + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemManager := system.NewManager(p) + networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) + volumeManager := volumes.NewManager(p, 0, nil) + limits := ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 0, + MaxMemoryPerInstance: 0, + } + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, "", nil, nil).(*manager) + + resourceMgr := resources.NewManager(cfg, p) + resourceMgr.SetInstanceLister(mgr) + resourceMgr.SetImageLister(imageManager) + resourceMgr.SetVolumeLister(volumeManager) + err = resourceMgr.Initialize(context.Background()) + require.NoError(t, err) + mgr.SetResourceValidator(resourceMgr) + + t.Cleanup(func() { + cleanupOrphanedProcesses(t, mgr) + }) + + return mgr, tmpDir +} + +// vzExecCommand runs a command in the guest via vsock exec. +func vzExecCommand(ctx context.Context, inst *Instance, command ...string) (string, int, error) { + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + if err != nil { + return "", -1, err + } + + var stdout, stderr bytes.Buffer + exit, err := guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ + Command: command, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + if err != nil { + return stderr.String(), -1, err + } + + output := stdout.String() + if stderr.Len() > 0 { + output += "\nSTDERR: " + stderr.String() + } + return output, exit.Code, nil +} + +// TestVZBasicLifecycle tests the full vz instance lifecycle: create, exec, stop, start, delete. +func TestVZBasicLifecycle(t *testing.T) { + if runtime.GOOS != "darwin" { + t.Skip("vz tests require macOS") + } + + mgr, tmpDir := setupVZTestManager(t) + ctx := context.Background() + p := paths.New(tmpDir) + + // Prepare image + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + t.Log("Pulling alpine:latest image...") + alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/alpine:latest", + }) + require.NoError(t, err) + + imageName := alpineImage.Name + for i := 0; i < 60; i++ { + img, err := imageManager.GetImage(ctx, imageName) + if err == nil && img.Status == images.StatusReady { + alpineImage = img + break + } + if err == nil && img.Status == images.StatusFailed { + t.Fatalf("Image build failed: %s", *img.Error) + } + time.Sleep(1 * time.Second) + } + require.Equal(t, images.StatusReady, alpineImage.Status, "Image should be ready") + t.Log("Alpine image ready") + + // Ensure system files (kernel + initrd) + systemManager := system.NewManager(p) + err = systemManager.EnsureSystemFiles(ctx) + require.NoError(t, err) + t.Log("System files ready") + + // Create instance using vz hypervisor + inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ + Name: "test-vz-lifecycle", + Image: "docker.io/library/alpine:latest", + Size: 2 * 1024 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: false, + Hypervisor: hypervisor.TypeVZ, + Cmd: []string{"sleep", "infinity"}, + Env: map[string]string{"TEST_VAR": "hello"}, + }) + if err != nil { + dumpVZShimLogs(t, tmpDir) + require.NoError(t, err) + } + require.NotNil(t, inst) + assert.Equal(t, StateRunning, inst.State) + assert.Equal(t, hypervisor.TypeVZ, inst.HypervisorType) + t.Logf("Instance created: %s (hypervisor: %s)", inst.Id, inst.HypervisorType) + + t.Cleanup(func() { + t.Log("Cleaning up instance...") + mgr.DeleteInstance(ctx, inst.Id) + }) + + // Wait for guest agent to be ready + err = waitForExecAgent(ctx, mgr, inst.Id, 30*time.Second) + require.NoError(t, err, "guest agent should be ready") + t.Log("Guest agent ready") + + // Exec test: echo hello + output, exitCode, err := vzExecCommand(ctx, inst, "echo", "hello") + require.NoError(t, err, "exec should succeed") + require.Equal(t, 0, exitCode) + assert.Equal(t, "hello", strings.TrimSpace(output)) + t.Log("Exec test passed") + + // Graceful shutdown test + t.Log("Stopping instance (graceful shutdown)...") + inst, err = mgr.StopInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStopped, inst.State) + t.Log("Instance stopped") + + // Verify hypervisor process is gone + oldPID := inst.HypervisorPID + if oldPID != nil { + time.Sleep(500 * time.Millisecond) + err := checkProcessGone(*oldPID) + assert.NoError(t, err, "hypervisor process should be gone after stop") + } + + // Restart test + t.Log("Starting instance (restart after stop)...") + inst, err = mgr.StartInstance(ctx, inst.Id, StartInstanceRequest{}) + require.NoError(t, err) + assert.Equal(t, StateRunning, inst.State) + t.Logf("Instance restarted: %s (pid: %v)", inst.Id, inst.HypervisorPID) + + // Re-read instance to get updated vsock info + inst, err = mgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Wait for exec to actually work after restart + // (can't rely on waitForExecAgent - logs from first boot still contain the marker) + t.Log("Waiting for exec to work after restart...") + var execErr error + for i := 0; i < 30; i++ { + time.Sleep(1 * time.Second) + // Re-read instance each time in case vsock info updates + inst, err = mgr.GetInstance(ctx, inst.Id) + if err != nil { + continue + } + output, exitCode, execErr = vzExecCommand(ctx, inst, "echo", "after-restart") + if execErr == nil && exitCode == 0 { + break + } + t.Logf("Exec attempt %d: err=%v", i+1, execErr) + } + if execErr != nil { + dumpVZShimLogs(t, tmpDir) + // Dump ALL log files + allLogs, _ := filepath.Glob(filepath.Join(tmpDir, "guests", "*", "logs", "*")) + for _, logFile := range allLogs { + content, err := os.ReadFile(logFile) + if err == nil && len(content) > 0 { + if len(content) > 4000 { + content = content[len(content)-4000:] + } + t.Logf("log file (%s):\n%s", logFile, string(content)) + } else if err == nil { + t.Logf("log file (%s): EMPTY", logFile) + } + } + // Check if vz-shim is still running + if inst.HypervisorPID != nil { + err := checkProcessGone(*inst.HypervisorPID) + if err != nil { + t.Logf("vz-shim process %d is still running", *inst.HypervisorPID) + } else { + t.Logf("vz-shim process %d is GONE (crashed?)", *inst.HypervisorPID) + } + } + } + require.NoError(t, execErr, "exec should succeed after restart") + require.Equal(t, 0, exitCode) + assert.Equal(t, "after-restart", strings.TrimSpace(output)) + t.Log("Exec after restart passed") + + // Stop again before delete + t.Log("Stopping instance before delete...") + inst, err = mgr.StopInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStopped, inst.State) + + // Delete test + t.Log("Deleting instance...") + err = mgr.DeleteInstance(ctx, inst.Id) + require.NoError(t, err) + + assert.NoDirExists(t, p.InstanceDir(inst.Id)) + _, err = mgr.GetInstance(ctx, inst.Id) + assert.ErrorIs(t, err, ErrNotFound) + t.Log("Instance deleted and cleaned up") +} + +// TestVZExecAndShutdown focuses on exec behavior and graceful shutdown. +func TestVZExecAndShutdown(t *testing.T) { + if runtime.GOOS != "darwin" { + t.Skip("vz tests require macOS") + } + + mgr, tmpDir := setupVZTestManager(t) + ctx := context.Background() + p := paths.New(tmpDir) + + // Prepare image + imageManager, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + t.Log("Pulling alpine:latest image...") + alpineImage, err := imageManager.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/alpine:latest", + }) + require.NoError(t, err) + + imageName := alpineImage.Name + for i := 0; i < 60; i++ { + img, err := imageManager.GetImage(ctx, imageName) + if err == nil && img.Status == images.StatusReady { + alpineImage = img + break + } + if err == nil && img.Status == images.StatusFailed { + t.Fatalf("Image build failed: %s", *img.Error) + } + time.Sleep(1 * time.Second) + } + require.Equal(t, images.StatusReady, alpineImage.Status, "Image should be ready") + + systemManager := system.NewManager(p) + err = systemManager.EnsureSystemFiles(ctx) + require.NoError(t, err) + + inst, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ + Name: "test-vz-exec", + Image: "docker.io/library/alpine:latest", + Size: 2 * 1024 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: false, + Hypervisor: hypervisor.TypeVZ, + Cmd: []string{"sleep", "infinity"}, + }) + if err != nil { + dumpVZShimLogs(t, tmpDir) + require.NoError(t, err) + } + assert.Equal(t, StateRunning, inst.State) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + mgr.DeleteInstance(ctx, inst.Id) + }) + + err = waitForExecAgent(ctx, mgr, inst.Id, 30*time.Second) + require.NoError(t, err, "guest agent should be ready") + + // Test: echo hello + output, exitCode, err := vzExecCommand(ctx, inst, "echo", "hello") + require.NoError(t, err) + require.Equal(t, 0, exitCode) + assert.Equal(t, "hello", strings.TrimSpace(output)) + t.Log("echo test passed") + + // Test: nonexistent command should error, not hang + dialer, err := hypervisor.NewVsockDialer(inst.HypervisorType, inst.VsockSocket, inst.VsockCID) + require.NoError(t, err) + + start := time.Now() + var stdout, stderr strings.Builder + _, err = guest.ExecIntoInstance(ctx, dialer, guest.ExecOptions{ + Command: []string{"nonexistent_command_xyz"}, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + elapsed := time.Since(start) + require.Error(t, err, "exec should fail for nonexistent command") + require.Less(t, elapsed, 5*time.Second, "exec should not hang") + t.Logf("Nonexistent command failed correctly in %v", elapsed) + + // Graceful shutdown + t.Log("Stopping instance...") + inst, err = mgr.StopInstance(ctx, inst.Id) + require.NoError(t, err) + assert.Equal(t, StateStopped, inst.State) + t.Log("Instance stopped gracefully") + + // Delete + err = mgr.DeleteInstance(ctx, inst.Id) + require.NoError(t, err) + _, err = mgr.GetInstance(ctx, inst.Id) + assert.ErrorIs(t, err, ErrNotFound) + t.Log("Instance deleted") +} + +// dumpVZShimLogs logs any vz-shim log files found under tmpDir for debugging CI failures. +func dumpVZShimLogs(t *testing.T, tmpDir string) { + t.Helper() + logFiles, _ := filepath.Glob(filepath.Join(tmpDir, "guests", "*", "logs", "vz-shim.log")) + for _, logFile := range logFiles { + content, err := os.ReadFile(logFile) + if err == nil && len(content) > 0 { + t.Logf("vz-shim log (%s):\n%s", logFile, string(content)) + } + } +} + +// checkProcessGone verifies a process no longer exists. +func checkProcessGone(pid int) error { + proc, err := os.FindProcess(pid) + if err != nil { + return nil + } + err = proc.Signal(syscall.Signal(0)) + if err != nil { + return nil // Process doesn't exist + } + return fmt.Errorf("process %d still running", pid) +} diff --git a/lib/network/README.md b/lib/network/README.md index 1e771532..c54e66a8 100644 --- a/lib/network/README.md +++ b/lib/network/README.md @@ -1,6 +1,21 @@ # Network Manager -Manages the default virtual network for instances using a Linux bridge and TAP devices. +Manages the default virtual network for instances. + +## Platform Support + +| Platform | Network Model | Implementation | +|----------|---------------|----------------| +| Linux | Bridge + TAP | Linux bridge with TAP devices per VM, iptables NAT | +| macOS | NAT | Virtualization.framework built-in NAT (192.168.64.0/24) | + +On macOS, the network manager skips bridge/TAP creation since vz provides NAT networking automatically. + +--- + +## Linux Networking + +On Linux, hypeman manages a virtual network using a Linux bridge and TAP devices. ## How Linux VM Networking Works diff --git a/lib/system/init/mount.go b/lib/system/init/mount.go index 9259cc77..faf65d4d 100644 --- a/lib/system/init/mount.go +++ b/lib/system/init/mount.go @@ -224,9 +224,8 @@ func redirectToConsole(device string) { } // copyGuestAgent copies the guest-agent binary to the target location in the new root. -// It skips copying if: -// - skipGuestAgent config option is true -// - The destination file already exists (lazy copy optimization) +// Always copies from initrd to ensure the binary matches the current version and is not +// corrupted (e.g., after a force-kill that left the overlay in a dirty state). func copyGuestAgent(log *Logger, skipGuestAgent bool) error { const ( src = "/usr/local/bin/guest-agent" @@ -239,12 +238,6 @@ func copyGuestAgent(log *Logger, skipGuestAgent bool) error { return nil } - // Check if destination already exists (lazy copy - skip if already present) - if _, err := os.Stat(dst); err == nil { - log.Info("hypeman-init:agent", "guest-agent already exists, skipping copy") - return nil - } - // Create target directory if err := os.MkdirAll("/overlay/newroot/opt/hypeman", 0755); err != nil { return fmt.Errorf("mkdir: %w", err) @@ -256,7 +249,7 @@ func copyGuestAgent(log *Logger, skipGuestAgent bool) error { return fmt.Errorf("read source: %w", err) } - // Write to destination + // Write to destination (always overwrite to ensure correct binary after restarts) if err := os.WriteFile(dst, data, 0755); err != nil { return fmt.Errorf("write destination: %w", err) } diff --git a/lib/system/initrd.go b/lib/system/initrd.go index 22f64713..3ef4c103 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -35,14 +35,14 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) return "", fmt.Errorf("create oci client: %w", err) } - // Inspect Alpine base to get digest - digest, err := ociClient.InspectManifest(ctx, alpineBaseImage) + // Inspect Alpine base to get digest (always use Linux platform since this is for guest VMs) + digest, err := ociClient.InspectManifestForLinux(ctx, alpineBaseImage) if err != nil { return "", fmt.Errorf("inspect alpine manifest: %w", err) } - // Pull and unpack Alpine base - if err := ociClient.PullAndUnpack(ctx, alpineBaseImage, digest, rootfsDir); err != nil { + // Pull and unpack Alpine base (always use Linux platform since this is for guest VMs) + if err := ociClient.PullAndUnpackForLinux(ctx, alpineBaseImage, digest, rootfsDir); err != nil { return "", fmt.Errorf("pull alpine base: %w", err) } diff --git a/scripts/e2e-install-test.sh b/scripts/e2e-install-test.sh new file mode 100755 index 00000000..cf7f1324 --- /dev/null +++ b/scripts/e2e-install-test.sh @@ -0,0 +1,219 @@ +#!/bin/bash +# +# Hypeman E2E Install Test +# +# Runs a full install → verify → uninstall cycle. +# Platform-agnostic: works on both Linux and macOS. +# + +set -e + +# Colors +RED='\033[38;2;255;110;110m' +GREEN='\033[38;2;92;190;83m' +YELLOW='\033[0;33m' +NC='\033[0m' + +info() { echo -e "${GREEN}[INFO]${NC} $1"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } +pass() { echo -e "${GREEN}[PASS]${NC} $1"; } +fail() { echo -e "${RED}[FAIL]${NC} $1"; exit 1; } + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +OS=$(uname -s | tr '[:upper:]' '[:lower:]') + +cd "$REPO_DIR" + +# ============================================================================= +# Phase 1: Clean slate +# ============================================================================= +info "Phase 1: Cleaning previous installation..." +KEEP_DATA=false bash scripts/uninstall.sh 2>/dev/null || true + +# ============================================================================= +# Phase 2: Install from source +# ============================================================================= +info "Phase 2: Installing from source..." +BRANCH=$(git rev-parse --abbrev-ref HEAD) +BRANCH="$BRANCH" bash scripts/install.sh + +# ============================================================================= +# Phase 3: Wait for service +# ============================================================================= +info "Phase 3: Waiting for service to be healthy..." + +PORT=8080 +TIMEOUT=60 +ELAPSED=0 + +while [ $ELAPSED -lt $TIMEOUT ]; do + if curl -sf "http://localhost:${PORT}/health" >/dev/null 2>&1; then + pass "Service is responding on port ${PORT}" + break + fi + sleep 2 + ELAPSED=$((ELAPSED + 2)) +done + +if [ $ELAPSED -ge $TIMEOUT ]; then + # Dump logs for debugging + if [ "$OS" = "darwin" ]; then + LOG_FILE="$HOME/Library/Application Support/hypeman/logs/hypeman.log" + if [ -f "$LOG_FILE" ]; then + warn "Service logs (last 50 lines):" + tail -50 "$LOG_FILE" || true + else + warn "No log file found at $LOG_FILE" + fi + warn "launchctl list:" + launchctl list | grep hypeman || true + fi + fail "Service did not become healthy within ${TIMEOUT}s" +fi + +# ============================================================================= +# Phase 4: Validate installation +# ============================================================================= +info "Phase 4: Validating installation..." + +# Check binaries +if [ "$OS" = "darwin" ]; then + [ -x /usr/local/bin/hypeman-api ] || fail "hypeman-api binary not found" + pass "Binaries installed correctly" + + # Check launchd service + if launchctl list | grep -q com.kernel.hypeman; then + pass "launchd service is loaded" + else + fail "launchd service not loaded" + fi +else + [ -x /opt/hypeman/bin/hypeman-api ] || fail "hypeman-api binary not found" + pass "Binaries installed correctly" + + # Check systemd service + if systemctl is-active --quiet hypeman; then + pass "systemd service is running" + else + fail "systemd service not running" + fi +fi + +# Check config +if [ "$OS" = "darwin" ]; then + [ -f "$HOME/.config/hypeman/config" ] || fail "Config file not found" +else + [ -f /etc/hypeman/config ] || fail "Config file not found" +fi +pass "Config file exists" + +# ============================================================================= +# Phase 4b: Testing CLI commands +# ============================================================================= +info "Phase 4b: Testing CLI commands..." + +# Determine config file path +if [ "$OS" = "darwin" ]; then + CONFIG_FILE="$HOME/.config/hypeman/config" +else + CONFIG_FILE="/etc/hypeman/config" +fi + +# Extract JWT_SECRET and PORT from config (source is unsafe — values may contain spaces) +JWT_SECRET=$(grep '^JWT_SECRET=' "$CONFIG_FILE" | cut -d= -f2-) +PORT=$(grep '^PORT=' "$CONFIG_FILE" | cut -d= -f2-) +export JWT_SECRET + +# Generate API token using hypeman-token +if [ "$OS" = "darwin" ]; then + API_KEY=$("/usr/local/bin/hypeman-token" -user-id "e2e-test-user") +else + API_KEY=$("/opt/hypeman/bin/hypeman-token" -user-id "e2e-test-user") +fi +[ -n "$API_KEY" ] || fail "Failed to generate API token" +pass "Generated API token" + +# Set CLI env +export HYPEMAN_API_KEY="$API_KEY" +export HYPEMAN_BASE_URL="http://localhost:${PORT:-8080}" + +# Determine CLI path +HYPEMAN_CMD="/usr/local/bin/hypeman" + +# Verify CLI was installed +[ -x "$HYPEMAN_CMD" ] || fail "hypeman CLI not found at $HYPEMAN_CMD" +pass "CLI installed" + +$HYPEMAN_CMD ps || fail "hypeman ps failed" +pass "hypeman ps works" + +# VM lifecycle test +E2E_VM_NAME="e2e-test-vm" + +$HYPEMAN_CMD pull nginx:alpine || fail "hypeman pull failed" +pass "hypeman pull works" + +# Wait for image to be available (pull is async) +IMAGE_READY=false +for i in $(seq 1 30); do + if $HYPEMAN_CMD run --name "$E2E_VM_NAME" nginx:alpine 2>&1; then + IMAGE_READY=true + break + fi + sleep 2 +done +[ "$IMAGE_READY" = true ] || fail "hypeman run failed (image not ready after 60s)" +pass "hypeman run works" + +# Wait for VM to be ready +VM_READY=false +for i in $(seq 1 30); do + if $HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello" >/dev/null 2>&1; then + VM_READY=true + break + fi + sleep 2 +done +[ "$VM_READY" = true ] || fail "VM did not become ready within 60s" + +OUTPUT=$($HYPEMAN_CMD exec "$E2E_VM_NAME" -- echo "hello from e2e") || fail "hypeman exec failed" +echo "$OUTPUT" | grep -q "hello from e2e" || fail "hypeman exec output mismatch: $OUTPUT" +pass "hypeman exec works" + +$HYPEMAN_CMD stop "$E2E_VM_NAME" || fail "hypeman stop failed" +pass "hypeman stop works" + +$HYPEMAN_CMD rm "$E2E_VM_NAME" || fail "hypeman rm failed" +pass "hypeman rm works" + +# ============================================================================= +# Phase 5: Cleanup +# ============================================================================= +info "Phase 5: Cleaning up..." +KEEP_DATA=false bash scripts/uninstall.sh + +# ============================================================================= +# Phase 6: Verify cleanup +# ============================================================================= +info "Phase 6: Verifying cleanup..." + +if [ "$OS" = "darwin" ]; then + [ ! -f /usr/local/bin/hypeman-api ] || fail "hypeman-api binary still exists after uninstall" + if launchctl list 2>/dev/null | grep -q com.kernel.hypeman; then + fail "launchd service still loaded after uninstall" + fi +else + [ ! -f /opt/hypeman/bin/hypeman-api ] || fail "hypeman-api binary still exists after uninstall" + if systemctl is-active --quiet hypeman 2>/dev/null; then + fail "systemd service still running after uninstall" + fi +fi +pass "Cleanup verified" + +# ============================================================================= +# Done +# ============================================================================= +echo "" +info "All E2E install tests passed!" diff --git a/scripts/install.sh b/scripts/install.sh index 063241f4..367b6db5 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -10,20 +10,15 @@ # CLI_VERSION - Install specific CLI version (default: latest) # BRANCH - Build from source using this branch (for development/testing) # BINARY_DIR - Use binaries from this directory instead of building/downloading -# INSTALL_DIR - Binary installation directory (default: /opt/hypeman/bin) -# DATA_DIR - Data directory (default: /var/lib/hypeman) -# CONFIG_DIR - Config directory (default: /etc/hypeman) +# INSTALL_DIR - Binary installation directory (default: /opt/hypeman/bin on Linux, /usr/local/bin on macOS) +# DATA_DIR - Data directory (default: /var/lib/hypeman on Linux, ~/Library/Application Support/hypeman on macOS) +# CONFIG_DIR - Config directory (default: /etc/hypeman on Linux, ~/.config/hypeman on macOS) # set -e REPO="kernel/hypeman" BINARY_NAME="hypeman-api" -INSTALL_DIR="${INSTALL_DIR:-/opt/hypeman/bin}" -DATA_DIR="${DATA_DIR:-/var/lib/hypeman}" -CONFIG_DIR="${CONFIG_DIR:-/etc/hypeman}" -CONFIG_FILE="${CONFIG_DIR}/config" -SYSTEMD_DIR="/etc/systemd/system" SERVICE_NAME="hypeman" # Colors for output (true color) @@ -38,64 +33,123 @@ warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } # Find the most recent release that has a specific artifact available -# Usage: find_release_with_artifact +# Usage: find_release_with_artifact [ext] # Returns: version tag (e.g., v0.5.0) or empty string if not found find_release_with_artifact() { local repo="$1" local archive_prefix="$2" local os="$3" local arch="$4" - + local ext="${5:-tar.gz}" + # Fetch recent release tags (up to 10) local tags tags=$(curl -fsSL "https://api.github.com/repos/${repo}/releases?per_page=10" 2>/dev/null | grep '"tag_name"' | cut -d'"' -f4) if [ -z "$tags" ]; then return 1 fi - + # Check each release for the artifact for tag in $tags; do local version_num="${tag#v}" - local artifact_name="${archive_prefix}_${version_num}_${os}_${arch}.tar.gz" + local artifact_name="${archive_prefix}_${version_num}_${os}_${arch}.${ext}" local artifact_url="https://github.com/${repo}/releases/download/${tag}/${artifact_name}" - + # Check if artifact exists (follow redirects, fail silently) if curl -fsSL --head "$artifact_url" >/dev/null 2>&1; then echo "$tag" return 0 fi done - + return 1 } +# ============================================================================= +# Detect OS and architecture (before pre-flight checks) +# ============================================================================= + +OS=$(uname -s | tr '[:upper:]' '[:lower:]') +ARCH=$(uname -m) +case $ARCH in + x86_64|amd64) + ARCH="amd64" + ;; + aarch64|arm64) + ARCH="arm64" + ;; + *) + error "Unsupported architecture: $ARCH (supported: amd64, arm64)" + ;; +esac + +if [ "$OS" != "linux" ] && [ "$OS" != "darwin" ]; then + error "Unsupported OS: $OS (supported: linux, darwin)" +fi + +# ============================================================================= +# OS-conditional defaults +# ============================================================================= + +if [ "$OS" = "darwin" ]; then + INSTALL_DIR="${INSTALL_DIR:-/usr/local/bin}" + DATA_DIR="${DATA_DIR:-$HOME/Library/Application Support/hypeman}" + CONFIG_DIR="${CONFIG_DIR:-$HOME/.config/hypeman}" +else + INSTALL_DIR="${INSTALL_DIR:-/opt/hypeman/bin}" + DATA_DIR="${DATA_DIR:-/var/lib/hypeman}" + CONFIG_DIR="${CONFIG_DIR:-/etc/hypeman}" +fi + +CONFIG_FILE="${CONFIG_DIR}/config" +SYSTEMD_DIR="/etc/systemd/system" + # ============================================================================= # Pre-flight checks - verify all requirements before doing anything # ============================================================================= info "Running pre-flight checks..." -# Check for root or sudo access SUDO="" -if [ "$EUID" -ne 0 ]; then - if ! command -v sudo >/dev/null 2>&1; then - error "This script requires root privileges. Please run as root or install sudo." +if [ "$OS" = "darwin" ]; then + # macOS pre-flight + if [ "$ARCH" != "arm64" ]; then + error "Intel Macs not supported" fi - # Try passwordless sudo first, then prompt from terminal if needed - if ! sudo -n true 2>/dev/null; then - info "Requesting sudo privileges..." - # Read password from /dev/tty (terminal) even when script is piped - if ! sudo -v < /dev/tty; then - error "Failed to obtain sudo privileges" + command -v codesign >/dev/null 2>&1 || error "codesign is required but not installed (install Xcode Command Line Tools)" + command -v docker >/dev/null 2>&1 || error "Docker CLI is required but not found. Install Docker via Colima or Docker Desktop." + # Check if we need sudo for INSTALL_DIR + if [ ! -w "$INSTALL_DIR" ] 2>/dev/null && [ ! -w "$(dirname "$INSTALL_DIR")" ] 2>/dev/null; then + if command -v sudo >/dev/null 2>&1; then + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges (needed for $INSTALL_DIR)..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi + fi + SUDO="sudo" + fi + fi +else + # Linux pre-flight + if [ "$EUID" -ne 0 ]; then + if ! command -v sudo >/dev/null 2>&1; then + error "This script requires root privileges. Please run as root or install sudo." + fi + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi fi + SUDO="sudo" fi - SUDO="sudo" + command -v systemctl >/dev/null 2>&1 || error "systemctl is required but not installed (systemd not available?)" fi -# Check for required commands +# Common checks command -v curl >/dev/null 2>&1 || error "curl is required but not installed" command -v tar >/dev/null 2>&1 || error "tar is required but not installed" -command -v systemctl >/dev/null 2>&1 || error "systemctl is required but not installed (systemd not available?)" command -v openssl >/dev/null 2>&1 || error "openssl is required but not installed" # Count how many of BRANCH, VERSION, BINARY_DIR are set @@ -122,73 +176,56 @@ if [ -n "$BINARY_DIR" ]; then fi fi -# Detect OS -OS=$(uname -s | tr '[:upper:]' '[:lower:]') -if [ "$OS" != "linux" ]; then - error "Hypeman only supports Linux (detected: $OS)" -fi - -# Detect architecture -ARCH=$(uname -m) -case $ARCH in - x86_64|amd64) - ARCH="amd64" - ;; - aarch64|arm64) - ARCH="arm64" - ;; - *) - error "Unsupported architecture: $ARCH (supported: amd64, arm64)" - ;; -esac - info "Pre-flight checks passed" # ============================================================================= # System Configuration - KVM access and network capabilities # ============================================================================= -# Get the installing user (for adding to groups) INSTALL_USER="${SUDO_USER:-$(whoami)}" -# Ensure KVM access -if [ -e /dev/kvm ]; then - if getent group kvm &>/dev/null; then - if ! groups "$INSTALL_USER" 2>/dev/null | grep -qw kvm; then - info "Adding user ${INSTALL_USER} to kvm group..." - $SUDO usermod -aG kvm "$INSTALL_USER" - warn "You may need to log out and back in for kvm group membership to take effect" +if [ "$OS" = "darwin" ]; then + info "macOS uses NAT networking via Virtualization.framework, no system config needed" +else + # Ensure KVM access + if [ -e /dev/kvm ]; then + if getent group kvm &>/dev/null; then + if ! groups "$INSTALL_USER" 2>/dev/null | grep -qw kvm; then + info "Adding user ${INSTALL_USER} to kvm group..." + $SUDO usermod -aG kvm "$INSTALL_USER" + warn "You may need to log out and back in for kvm group membership to take effect" + fi fi + else + warn "/dev/kvm not found - KVM may not be available on this system" fi -else - warn "/dev/kvm not found - KVM may not be available on this system" -fi -# Enable IPv4 forwarding (required for VM networking) -CURRENT_IP_FORWARD=$(sysctl -n net.ipv4.ip_forward 2>/dev/null || echo "0") -if [ "$CURRENT_IP_FORWARD" != "1" ]; then - info "Enabling IPv4 forwarding..." - $SUDO sysctl -w net.ipv4.ip_forward=1 > /dev/null - - # Make it persistent across reboots - if [ -d /etc/sysctl.d ]; then - echo 'net.ipv4.ip_forward=1' | $SUDO tee /etc/sysctl.d/99-hypeman.conf > /dev/null - elif ! grep -q '^net.ipv4.ip_forward=1' /etc/sysctl.conf 2>/dev/null; then - echo 'net.ipv4.ip_forward=1' | $SUDO tee -a /etc/sysctl.conf > /dev/null + # Enable IPv4 forwarding (required for VM networking) + CURRENT_IP_FORWARD=$(sysctl -n net.ipv4.ip_forward 2>/dev/null || echo "0") + if [ "$CURRENT_IP_FORWARD" != "1" ]; then + info "Enabling IPv4 forwarding..." + $SUDO sysctl -w net.ipv4.ip_forward=1 > /dev/null + + # Make it persistent across reboots + if [ -d /etc/sysctl.d ]; then + echo 'net.ipv4.ip_forward=1' | $SUDO tee /etc/sysctl.d/99-hypeman.conf > /dev/null + elif ! grep -q '^net.ipv4.ip_forward=1' /etc/sysctl.conf 2>/dev/null; then + echo 'net.ipv4.ip_forward=1' | $SUDO tee -a /etc/sysctl.conf > /dev/null + fi fi -fi -# Increase file descriptor limit for Caddy (ingress) -if [ -d /etc/security/limits.d ]; then - if [ ! -f /etc/security/limits.d/99-hypeman.conf ]; then - info "Configuring file descriptor limits for ingress..." - $SUDO tee /etc/security/limits.d/99-hypeman.conf > /dev/null << 'LIMITS' + # Increase file descriptor limit for Caddy (ingress) + if [ -d /etc/security/limits.d ]; then + if [ ! -f /etc/security/limits.d/99-hypeman.conf ]; then + info "Configuring file descriptor limits for ingress..." + $SUDO tee /etc/security/limits.d/99-hypeman.conf > /dev/null << 'LIMITS' # Hypeman: Increased file descriptor limits for Caddy ingress * soft nofile 65536 * hard nofile 65536 root soft nofile 65536 root hard nofile 65536 LIMITS + fi fi fi @@ -210,13 +247,20 @@ if [ -n "$BINARY_DIR" ]; then # Copy binaries to TMP_DIR info "Copying binaries from ${BINARY_DIR}..." - for f in "${BINARY_NAME}" "hypeman-token" ".env.example"; do - [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" - done + if [ "$OS" = "darwin" ]; then + for f in "${BINARY_NAME}" "hypeman-token" ".env.darwin.example"; do + [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" + done + cp "${BINARY_DIR}/.env.darwin.example" "${TMP_DIR}/.env.darwin.example" + else + for f in "${BINARY_NAME}" "hypeman-token" ".env.example"; do + [ -f "${BINARY_DIR}/${f}" ] || error "File ${f} not found in ${BINARY_DIR}" + done + cp "${BINARY_DIR}/.env.example" "${TMP_DIR}/.env.example" + fi cp "${BINARY_DIR}/${BINARY_NAME}" "${TMP_DIR}/${BINARY_NAME}" cp "${BINARY_DIR}/hypeman-token" "${TMP_DIR}/hypeman-token" - cp "${BINARY_DIR}/.env.example" "${TMP_DIR}/.env.example" # Make binaries executable chmod +x "${TMP_DIR}/${BINARY_NAME}" @@ -226,27 +270,37 @@ if [ -n "$BINARY_DIR" ]; then elif [ -n "$BRANCH" ]; then # Build from source mode info "Building from source (branch: $BRANCH)..." - + BUILD_DIR="${TMP_DIR}/hypeman" BUILD_LOG="${TMP_DIR}/build.log" - + # Clone repo (quiet) if ! git clone --branch "$BRANCH" --depth 1 -q "https://github.com/${REPO}.git" "$BUILD_DIR" 2>&1 | tee -a "$BUILD_LOG"; then error "Failed to clone repository. Build log:\n$(cat "$BUILD_LOG")" fi - + info "Building binaries (this may take a few minutes)..." cd "$BUILD_DIR" - - # Build main binary (includes dependencies) - capture output, show on error + if ! make build >> "$BUILD_LOG" 2>&1; then echo "" echo -e "${RED}Build failed. Full build log:${NC}" cat "$BUILD_LOG" error "Build failed" fi + if [ "$OS" = "darwin" ]; then + if ! make sign-darwin >> "$BUILD_LOG" 2>&1; then + echo "" + echo -e "${RED}Signing failed. Full build log:${NC}" + cat "$BUILD_LOG" + error "Signing failed" + fi + cp ".env.darwin.example" "${TMP_DIR}/.env.darwin.example" + else + cp ".env.example" "${TMP_DIR}/.env.example" + fi cp "bin/hypeman" "${TMP_DIR}/${BINARY_NAME}" - + # Build hypeman-token (not included in make build) if ! go build -o "${TMP_DIR}/hypeman-token" ./cmd/gen-jwt >> "$BUILD_LOG" 2>&1; then echo "" @@ -254,13 +308,10 @@ elif [ -n "$BRANCH" ]; then cat "$BUILD_LOG" error "Failed to build hypeman-token" fi - - # Copy .env.example for config template - cp ".env.example" "${TMP_DIR}/.env.example" - + VERSION="$BRANCH (source)" cd - > /dev/null - + info "Build complete" else # Download release mode @@ -285,15 +336,47 @@ else info "Extracting..." tar -xzf "${TMP_DIR}/${ARCHIVE_NAME}" -C "$TMP_DIR" + + # On macOS, codesign after extraction with virtualization entitlements + if [ "$OS" = "darwin" ]; then + info "Signing binaries..." + ENTITLEMENTS_TMP="${TMP_DIR}/vz.entitlements" + cat > "$ENTITLEMENTS_TMP" << 'ENTITLEMENTS' + + + + + com.apple.security.virtualization + + com.apple.security.network.server + + com.apple.security.network.client + + + +ENTITLEMENTS + if ! codesign --force --sign - --entitlements "$ENTITLEMENTS_TMP" "${TMP_DIR}/${BINARY_NAME}" 2>/dev/null; then + warn "codesign failed — vz hypervisor will not work without virtualization entitlement" + fi + rm -f "$ENTITLEMENTS_TMP" + fi fi # ============================================================================= # Stop existing service if running # ============================================================================= -if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Stopping existing ${SERVICE_NAME} service..." - $SUDO systemctl stop "$SERVICE_NAME" +if [ "$OS" = "darwin" ]; then + PLIST_PATH="$HOME/Library/LaunchAgents/com.kernel.hypeman.plist" + if [ -f "$PLIST_PATH" ]; then + info "Stopping existing ${SERVICE_NAME} service..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + fi +else + if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Stopping existing ${SERVICE_NAME} service..." + $SUDO systemctl stop "$SERVICE_NAME" + fi fi # ============================================================================= @@ -308,70 +391,178 @@ $SUDO install -m 755 "${TMP_DIR}/${BINARY_NAME}" "${INSTALL_DIR}/${BINARY_NAME}" info "Installing hypeman-token to ${INSTALL_DIR}..." $SUDO install -m 755 "${TMP_DIR}/hypeman-token" "${INSTALL_DIR}/hypeman-token" -# Install wrapper script to /usr/local/bin for easy access -info "Installing hypeman-token wrapper to /usr/local/bin..." -$SUDO tee /usr/local/bin/hypeman-token > /dev/null << EOF +if [ "$OS" = "linux" ]; then + # Install wrapper script to /usr/local/bin for easy access + info "Installing hypeman-token wrapper to /usr/local/bin..." + $SUDO tee /usr/local/bin/hypeman-token > /dev/null << EOF #!/bin/bash -# Wrapper script for hypeman-token that loads config from /etc/hypeman/config +# Wrapper script for hypeman-token that loads config from ${CONFIG_FILE} set -a source ${CONFIG_FILE} set +a exec ${INSTALL_DIR}/hypeman-token "\$@" EOF -$SUDO chmod 755 /usr/local/bin/hypeman-token + $SUDO chmod 755 /usr/local/bin/hypeman-token +fi # ============================================================================= # Create directories # ============================================================================= info "Creating data directory at ${DATA_DIR}..." -$SUDO mkdir -p "$DATA_DIR" +if [ "$OS" = "darwin" ]; then + mkdir -p "$DATA_DIR" + mkdir -p "$DATA_DIR/logs" +else + $SUDO mkdir -p "$DATA_DIR" +fi info "Creating config directory at ${CONFIG_DIR}..." -$SUDO mkdir -p "$CONFIG_DIR" +if [ "$OS" = "darwin" ]; then + mkdir -p "$CONFIG_DIR" +else + $SUDO mkdir -p "$CONFIG_DIR" +fi # ============================================================================= # Create config file (if it doesn't exist) # ============================================================================= if [ ! -f "$CONFIG_FILE" ]; then - # Get config template (from local build or download from repo) - if [ -f "${TMP_DIR}/.env.example" ]; then - info "Using config template from source..." - cp "${TMP_DIR}/.env.example" "${TMP_DIR}/config" + if [ "$OS" = "darwin" ]; then + # macOS config + if [ -f "${TMP_DIR}/.env.darwin.example" ]; then + info "Using macOS config template from source..." + cp "${TMP_DIR}/.env.darwin.example" "${TMP_DIR}/config" + else + info "Downloading macOS config template..." + CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.darwin.example" + if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then + error "Failed to download config template from ${CONFIG_URL}" + fi + fi + + # Expand ~ to $HOME (launchd doesn't do shell expansion) + sed -i '' "s|~/|${HOME}/|g" "${TMP_DIR}/config" + + # Generate random JWT secret + info "Generating JWT secret..." + JWT_SECRET=$(openssl rand -hex 32) + sed -i '' "s/^JWT_SECRET=.*/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" + + # Auto-detect Docker socket + DOCKER_SOCKET="" + if [ -n "$DOCKER_HOST" ]; then + DOCKER_SOCKET="${DOCKER_HOST#unix://}" + elif [ -S /var/run/docker.sock ]; then + DOCKER_SOCKET="/var/run/docker.sock" + elif [ -S "$HOME/.colima/default/docker.sock" ]; then + DOCKER_SOCKET="$HOME/.colima/default/docker.sock" + fi + if [ -n "$DOCKER_SOCKET" ]; then + info "Detected Docker socket: ${DOCKER_SOCKET}" + if grep -q '^DOCKER_SOCKET=' "${TMP_DIR}/config"; then + sed -i '' "s|^DOCKER_SOCKET=.*|DOCKER_SOCKET=${DOCKER_SOCKET}|" "${TMP_DIR}/config" + elif grep -q '^# DOCKER_SOCKET=' "${TMP_DIR}/config"; then + sed -i '' "s|^# DOCKER_SOCKET=.*|DOCKER_SOCKET=${DOCKER_SOCKET}|" "${TMP_DIR}/config" + else + echo "DOCKER_SOCKET=${DOCKER_SOCKET}" >> "${TMP_DIR}/config" + fi + fi + + info "Installing config file at ${CONFIG_FILE}..." + install -m 600 "${TMP_DIR}/config" "$CONFIG_FILE" else - info "Downloading config template..." - CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.example" - if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then - error "Failed to download config template from ${CONFIG_URL}" + # Linux config + if [ -f "${TMP_DIR}/.env.example" ]; then + info "Using config template from source..." + cp "${TMP_DIR}/.env.example" "${TMP_DIR}/config" + else + info "Downloading config template..." + CONFIG_URL="https://raw.githubusercontent.com/${REPO}/${VERSION}/.env.example" + if ! curl -fsSL "$CONFIG_URL" -o "${TMP_DIR}/config"; then + error "Failed to download config template from ${CONFIG_URL}" + fi fi + + # Generate random JWT secret + info "Generating JWT secret..." + JWT_SECRET=$(openssl rand -hex 32) + sed -i "s/^JWT_SECRET=$/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" + + # Set fixed ports for production (instead of random ports used in dev) + sed -i "s/^# CADDY_ADMIN_PORT=.*/CADDY_ADMIN_PORT=2019/" "${TMP_DIR}/config" + sed -i "s/^# INTERNAL_DNS_PORT=.*/INTERNAL_DNS_PORT=5353/" "${TMP_DIR}/config" + + info "Installing config file at ${CONFIG_FILE}..." + $SUDO install -m 640 "${TMP_DIR}/config" "$CONFIG_FILE" + $SUDO chown root:root "$CONFIG_FILE" fi - - # Generate random JWT secret - info "Generating JWT secret..." - JWT_SECRET=$(openssl rand -hex 32) - sed -i "s/^JWT_SECRET=$/JWT_SECRET=${JWT_SECRET}/" "${TMP_DIR}/config" - - # Set fixed ports for production (instead of random ports used in dev) - # Replace entire line to avoid trailing comments being included in the value - sed -i "s/^# CADDY_ADMIN_PORT=.*/CADDY_ADMIN_PORT=2019/" "${TMP_DIR}/config" - sed -i "s/^# INTERNAL_DNS_PORT=.*/INTERNAL_DNS_PORT=5353/" "${TMP_DIR}/config" - - info "Installing config file at ${CONFIG_FILE}..." - # Config is 640 root:root - intentionally requires root/sudo to read since it contains JWT_SECRET. - # The hypeman service runs as root and the CLI wrapper uses sudo to source the config. - $SUDO install -m 640 "${TMP_DIR}/config" "$CONFIG_FILE" - $SUDO chown root:root "$CONFIG_FILE" else info "Config file already exists at ${CONFIG_FILE}, skipping..." fi # ============================================================================= -# Install systemd service +# Install service # ============================================================================= -info "Installing systemd service..." -$SUDO tee "${SYSTEMD_DIR}/${SERVICE_NAME}.service" > /dev/null << EOF +if [ "$OS" = "darwin" ]; then + # macOS: launchd plist + PLIST_DIR="$HOME/Library/LaunchAgents" + PLIST_PATH="${PLIST_DIR}/com.kernel.hypeman.plist" + mkdir -p "$PLIST_DIR" + + info "Installing launchd service..." + + # Build environment variables from config file + ENV_DICT="" + if [ -f "$CONFIG_FILE" ]; then + while IFS= read -r line; do + # Skip comments and empty lines + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "$line" ]] && continue + key="${line%%=*}" + value="${line#*=}" + ENV_DICT="${ENV_DICT} + ${key} + ${value}" + done < "$CONFIG_FILE" + fi + + cat > "$PLIST_PATH" << PLIST + + + + + Label + com.kernel.hypeman + ProgramArguments + + ${INSTALL_DIR}/${BINARY_NAME} + + EnvironmentVariables + + PATH + /opt/homebrew/opt/e2fsprogs/sbin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin${ENV_DICT} + + KeepAlive + + RunAtLoad + + StandardOutPath + ${DATA_DIR}/logs/hypeman.log + StandardErrorPath + ${DATA_DIR}/logs/hypeman.log + + +PLIST + + info "Loading ${SERVICE_NAME} service..." + launchctl load "$PLIST_PATH" +else + # Linux: systemd + info "Installing systemd service..." + $SUDO tee "${SYSTEMD_DIR}/${SERVICE_NAME}.service" > /dev/null << EOF [Unit] Description=Hypeman API Server Documentation=https://github.com/kernel/hypeman @@ -396,17 +587,42 @@ ReadWritePaths=${DATA_DIR} WantedBy=multi-user.target EOF -# Reload systemd -info "Reloading systemd..." -$SUDO systemctl daemon-reload + info "Reloading systemd..." + $SUDO systemctl daemon-reload -# Enable service -info "Enabling ${SERVICE_NAME} service..." -$SUDO systemctl enable "$SERVICE_NAME" + info "Enabling ${SERVICE_NAME} service..." + $SUDO systemctl enable "$SERVICE_NAME" -# Start service -info "Starting ${SERVICE_NAME} service..." -$SUDO systemctl start "$SERVICE_NAME" + info "Starting ${SERVICE_NAME} service..." + $SUDO systemctl start "$SERVICE_NAME" +fi + +# ============================================================================= +# Build builder image (macOS) +# ============================================================================= + +if [ "$OS" = "darwin" ]; then + info "Attempting to build builder image..." + if command -v docker >/dev/null 2>&1; then + if [ -n "$BRANCH" ] && [ -d "${TMP_DIR}/hypeman" ]; then + BUILD_CONTEXT="${TMP_DIR}/hypeman" + else + BUILD_CONTEXT="" + fi + + if [ -n "$BUILD_CONTEXT" ] && [ -f "${BUILD_CONTEXT}/lib/builds/images/generic/Dockerfile" ]; then + if ! docker build -t hypeman/builder:latest -f "${BUILD_CONTEXT}/lib/builds/images/generic/Dockerfile" "$BUILD_CONTEXT" 2>/dev/null; then + warn "Failed to build builder image. You can build it later manually." + else + info "Builder image built successfully" + fi + else + warn "Builder image Dockerfile not available. Build it manually: docker build -t hypeman/builder:latest -f lib/builds/images/generic/Dockerfile ." + fi + else + warn "Docker not available, skipping builder image build" + fi +fi # ============================================================================= # Install Hypeman CLI @@ -414,34 +630,51 @@ $SUDO systemctl start "$SERVICE_NAME" CLI_REPO="kernel/hypeman-cli" +# CLI releases use goreleaser naming: "macos" not "darwin", .zip not .tar.gz on macOS +if [ "$OS" = "darwin" ]; then + CLI_OS="macos" + CLI_EXT="zip" +else + CLI_OS="$OS" + CLI_EXT="tar.gz" +fi + if [ -z "$CLI_VERSION" ] || [ "$CLI_VERSION" == "latest" ]; then info "Fetching latest CLI version with available artifacts..." - CLI_VERSION=$(find_release_with_artifact "$CLI_REPO" "hypeman" "$OS" "$ARCH") + CLI_VERSION=$(find_release_with_artifact "$CLI_REPO" "hypeman" "$CLI_OS" "$ARCH" "$CLI_EXT" || true) if [ -z "$CLI_VERSION" ]; then - warn "Failed to find a CLI release with artifacts for ${OS}/${ARCH}, skipping CLI installation" + warn "Failed to find a CLI release with artifacts for ${CLI_OS}/${ARCH}, skipping CLI installation" fi fi if [ -n "$CLI_VERSION" ]; then info "Installing Hypeman CLI version: $CLI_VERSION" - + CLI_VERSION_NUM="${CLI_VERSION#v}" - CLI_ARCHIVE_NAME="hypeman_${CLI_VERSION_NUM}_${OS}_${ARCH}.tar.gz" + CLI_ARCHIVE_NAME="hypeman_${CLI_VERSION_NUM}_${CLI_OS}_${ARCH}.${CLI_EXT}" CLI_DOWNLOAD_URL="https://github.com/${CLI_REPO}/releases/download/${CLI_VERSION}/${CLI_ARCHIVE_NAME}" - + info "Downloading CLI ${CLI_ARCHIVE_NAME}..." if curl -fsSL "$CLI_DOWNLOAD_URL" -o "${TMP_DIR}/${CLI_ARCHIVE_NAME}"; then info "Extracting CLI..." mkdir -p "${TMP_DIR}/cli" - tar -xzf "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -C "${TMP_DIR}/cli" - - # Install CLI binary - info "Installing hypeman CLI to ${INSTALL_DIR}..." - $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman-cli" - - # Install wrapper script to /usr/local/bin for PATH access - info "Installing hypeman wrapper to /usr/local/bin..." - $SUDO tee /usr/local/bin/hypeman > /dev/null << WRAPPER + if [ "$CLI_EXT" = "zip" ]; then + unzip -qo "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -d "${TMP_DIR}/cli" + else + tar -xzf "${TMP_DIR}/${CLI_ARCHIVE_NAME}" -C "${TMP_DIR}/cli" + fi + + if [ "$OS" = "darwin" ]; then + info "Installing hypeman CLI to ${INSTALL_DIR}..." + $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman" + else + # Install CLI binary + info "Installing hypeman CLI to ${INSTALL_DIR}..." + $SUDO install -m 755 "${TMP_DIR}/cli/hypeman" "${INSTALL_DIR}/hypeman-cli" + + # Install wrapper script to /usr/local/bin for PATH access + info "Installing hypeman wrapper to /usr/local/bin..." + $SUDO tee /usr/local/bin/hypeman > /dev/null << WRAPPER #!/bin/bash # Wrapper script for hypeman CLI that auto-generates API token set -a @@ -450,7 +683,8 @@ set +a export HYPEMAN_API_KEY=\$(${INSTALL_DIR}/hypeman-token -user-id "cli-user-\$(whoami)" 2>/dev/null) exec ${INSTALL_DIR}/hypeman-cli "\$@" WRAPPER - $SUDO chmod 755 /usr/local/bin/hypeman + $SUDO chmod 755 /usr/local/bin/hypeman + fi else warn "Failed to download CLI from ${CLI_DOWNLOAD_URL}, skipping CLI installation" fi @@ -473,12 +707,24 @@ EOF echo -e "${NC}" info "Hypeman installed successfully!" echo "" -echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" -echo " CLI: /usr/local/bin/hypeman" -echo " Token tool: /usr/local/bin/hypeman-token" -echo " Config: ${CONFIG_FILE}" -echo " Data: ${DATA_DIR}" -echo " Service: ${SERVICE_NAME}.service" + +if [ "$OS" = "darwin" ]; then + echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" + echo " CLI: ${INSTALL_DIR}/hypeman" + echo " Token tool: ${INSTALL_DIR}/hypeman-token" + echo " Config: ${CONFIG_FILE}" + echo " Data: ${DATA_DIR}" + echo " Service: ~/Library/LaunchAgents/com.kernel.hypeman.plist" + echo " Logs: ${DATA_DIR}/logs/hypeman.log" +else + echo " API Binary: ${INSTALL_DIR}/${BINARY_NAME}" + echo " CLI: /usr/local/bin/hypeman" + echo " Token tool: /usr/local/bin/hypeman-token" + echo " Config: ${CONFIG_FILE}" + echo " Data: ${DATA_DIR}" + echo " Service: ${SERVICE_NAME}.service" +fi + echo "" echo "" echo "Next steps:" diff --git a/scripts/uninstall.sh b/scripts/uninstall.sh index ac45bb42..ccf9d673 100755 --- a/scripts/uninstall.sh +++ b/scripts/uninstall.sh @@ -6,16 +6,12 @@ # curl -fsSL https://raw.githubusercontent.com/kernel/hypeman/main/scripts/uninstall.sh | bash # # Options (via environment variables): -# KEEP_DATA=false - Remove data directory (/var/lib/hypeman) - kept by default -# KEEP_CONFIG=true - Keep config directory (/etc/hypeman) +# KEEP_DATA=false - Remove data directory - kept by default +# KEEP_CONFIG=true - Keep config directory # set -e -INSTALL_DIR="/opt/hypeman" -DATA_DIR="/var/lib/hypeman" -CONFIG_DIR="/etc/hypeman" -SYSTEMD_DIR="/etc/systemd/system" SERVICE_NAME="hypeman" SERVICE_USER="hypeman" @@ -30,50 +26,100 @@ info() { echo -e "${GREEN}[INFO]${NC} $1"; } warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } +# ============================================================================= +# Detect OS +# ============================================================================= + +OS=$(uname -s | tr '[:upper:]' '[:lower:]') +if [ "$OS" != "linux" ] && [ "$OS" != "darwin" ]; then + error "Unsupported OS: $OS (supported: linux, darwin)" +fi + +# ============================================================================= +# OS-conditional defaults +# ============================================================================= + +if [ "$OS" = "darwin" ]; then + INSTALL_DIR="/usr/local/bin" + DATA_DIR="$HOME/Library/Application Support/hypeman" + CONFIG_DIR="$HOME/.config/hypeman" +else + INSTALL_DIR="/opt/hypeman" + DATA_DIR="/var/lib/hypeman" + CONFIG_DIR="/etc/hypeman" +fi + +SYSTEMD_DIR="/etc/systemd/system" + # ============================================================================= # Pre-flight checks # ============================================================================= info "Running pre-flight checks..." -# Check for root or sudo access SUDO="" -if [ "$EUID" -ne 0 ]; then - if ! command -v sudo >/dev/null 2>&1; then - error "This script requires root privileges. Please run as root or install sudo." +if [ "$OS" = "linux" ]; then + if [ "$EUID" -ne 0 ]; then + if ! command -v sudo >/dev/null 2>&1; then + error "This script requires root privileges. Please run as root or install sudo." + fi + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges..." + if ! sudo -v < /dev/tty; then + error "Failed to obtain sudo privileges" + fi + fi + SUDO="sudo" fi - # Try passwordless sudo first, then prompt from terminal if needed - if ! sudo -n true 2>/dev/null; then - info "Requesting sudo privileges..." - if ! sudo -v < /dev/tty; then - error "Failed to obtain sudo privileges" +elif [ "$OS" = "darwin" ]; then + if [ ! -w "$INSTALL_DIR" ] 2>/dev/null; then + if command -v sudo >/dev/null 2>&1; then + if ! sudo -n true 2>/dev/null; then + info "Requesting sudo privileges (needed for $INSTALL_DIR)..." + sudo -v < /dev/tty 2>/dev/null || true + fi + SUDO="sudo" fi fi - SUDO="sudo" fi # ============================================================================= # Stop and disable service # ============================================================================= -if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Stopping ${SERVICE_NAME} service..." - $SUDO systemctl stop "$SERVICE_NAME" -fi +if [ "$OS" = "darwin" ]; then + PLIST_PATH="$HOME/Library/LaunchAgents/com.kernel.hypeman.plist" + if [ -f "$PLIST_PATH" ]; then + info "Stopping ${SERVICE_NAME} service..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true + fi +else + if $SUDO systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Stopping ${SERVICE_NAME} service..." + $SUDO systemctl stop "$SERVICE_NAME" + fi -if $SUDO systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Disabling ${SERVICE_NAME} service..." - $SUDO systemctl disable "$SERVICE_NAME" + if $SUDO systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then + info "Disabling ${SERVICE_NAME} service..." + $SUDO systemctl disable "$SERVICE_NAME" + fi fi # ============================================================================= -# Remove systemd service +# Remove service files # ============================================================================= -if [ -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" ]; then - info "Removing systemd service..." - $SUDO rm -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" - $SUDO systemctl daemon-reload +if [ "$OS" = "darwin" ]; then + if [ -f "$PLIST_PATH" ]; then + info "Removing launchd plist..." + rm -f "$PLIST_PATH" + fi +else + if [ -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" ]; then + info "Removing systemd service..." + $SUDO rm -f "${SYSTEMD_DIR}/${SERVICE_NAME}.service" + $SUDO systemctl daemon-reload + fi fi # ============================================================================= @@ -82,13 +128,19 @@ fi info "Removing binaries..." -# Remove wrapper scripts from /usr/local/bin -$SUDO rm -f /usr/local/bin/hypeman -$SUDO rm -f /usr/local/bin/hypeman-token - -# Remove install directory -if [ -d "$INSTALL_DIR" ]; then - $SUDO rm -rf "$INSTALL_DIR" +if [ "$OS" = "darwin" ]; then + $SUDO rm -f "${INSTALL_DIR}/hypeman-api" + $SUDO rm -f "${INSTALL_DIR}/hypeman-token" + $SUDO rm -f "${INSTALL_DIR}/hypeman" +else + # Remove wrapper scripts from /usr/local/bin + $SUDO rm -f /usr/local/bin/hypeman + $SUDO rm -f /usr/local/bin/hypeman-token + + # Remove install directory + if [ -d "$INSTALL_DIR" ]; then + $SUDO rm -rf "$INSTALL_DIR" + fi fi # ============================================================================= @@ -100,7 +152,11 @@ if [ -d "$DATA_DIR" ]; then info "Keeping data directory: ${DATA_DIR}" else info "Removing data directory: ${DATA_DIR}" - $SUDO rm -rf "$DATA_DIR" + if [ "$OS" = "darwin" ]; then + rm -rf "$DATA_DIR" + else + $SUDO rm -rf "$DATA_DIR" + fi fi fi @@ -113,20 +169,26 @@ if [ -d "$CONFIG_DIR" ]; then warn "Keeping config directory: ${CONFIG_DIR}" else info "Removing config directory: ${CONFIG_DIR}" - $SUDO rm -rf "$CONFIG_DIR" + if [ "$OS" = "darwin" ]; then + rm -rf "$CONFIG_DIR" + else + $SUDO rm -rf "$CONFIG_DIR" + fi fi fi # ============================================================================= -# Remove hypeman user +# Remove hypeman user (Linux only) # ============================================================================= -if id "$SERVICE_USER" &>/dev/null; then - if [ "${KEEP_DATA:-true}" = "true" ]; then - info "Keeping system user: ${SERVICE_USER} (data is preserved)" - else - info "Removing system user: ${SERVICE_USER}" - $SUDO userdel "$SERVICE_USER" 2>/dev/null || true +if [ "$OS" = "linux" ]; then + if id "$SERVICE_USER" &>/dev/null; then + if [ "${KEEP_DATA:-true}" = "true" ]; then + info "Keeping system user: ${SERVICE_USER} (data is preserved)" + else + info "Removing system user: ${SERVICE_USER}" + $SUDO userdel "$SERVICE_USER" 2>/dev/null || true + fi fi fi @@ -150,19 +212,32 @@ echo "" if [ "${KEEP_DATA:-true}" = "true" ] && [ -d "$DATA_DIR" ]; then info "Data directory preserved: ${DATA_DIR}" - echo " To remove: sudo rm -rf ${DATA_DIR}" + if [ "$OS" = "darwin" ]; then + echo " To remove: rm -rf \"${DATA_DIR}\"" + else + echo " To remove: sudo rm -rf ${DATA_DIR}" + fi echo "" fi if [ "${KEEP_CONFIG:-false}" = "true" ] && [ -d "$CONFIG_DIR" ]; then info "Config directory preserved: ${CONFIG_DIR}" - echo " To remove: sudo rm -rf ${CONFIG_DIR}" + if [ "$OS" = "darwin" ]; then + echo " To remove: rm -rf \"${CONFIG_DIR}\"" + else + echo " To remove: sudo rm -rf ${CONFIG_DIR}" + fi echo "" fi -warn "Note: Caddy or Cloud Hypervisor processes may still be running." -echo " Check with: ps aux | grep -E 'caddy|cloud-h'" -echo " Kill all: sudo pkill -f caddy; sudo pkill -f cloud-h" +if [ "$OS" = "darwin" ]; then + warn "Note: vz-shim processes managed by hypeman may still be running." + echo " Check with: ps aux | grep vz-shim" +else + warn "Note: Caddy or Cloud Hypervisor processes may still be running." + echo " Check with: ps aux | grep -E 'caddy|cloud-h'" + echo " Kill all: sudo pkill -f caddy; sudo pkill -f cloud-h" +fi echo "" echo "To reinstall:" diff --git a/vz.entitlements b/vz.entitlements new file mode 100644 index 00000000..41432913 --- /dev/null +++ b/vz.entitlements @@ -0,0 +1,14 @@ + + + + + + com.apple.security.virtualization + + + com.apple.security.network.server + + com.apple.security.network.client + + +