diff --git a/.github/workflows/build-binaries.yaml b/.github/workflows/build-binaries.yaml new file mode 100644 index 00000000..acefc9a6 --- /dev/null +++ b/.github/workflows/build-binaries.yaml @@ -0,0 +1,170 @@ +# Copyright 2023 LiveKit, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Build binaries + +on: + workflow_call: + inputs: + output_dir: + type: string + default: bin + upload_release: + type: boolean + default: false + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - os: macos-latest + suffix: darwin_arm64 + - os: ubuntu-latest + suffix: linux_amd64 + zig_target: x86_64-linux-gnu.2.28 + alsa_arch: amd64 + alsa_triple: x86_64-linux-gnu + - os: ubuntu-latest + suffix: linux_arm64 + zig_target: aarch64-linux-gnu.2.28 + alsa_arch: arm64 + alsa_triple: aarch64-linux-gnu + goarch: arm64 + - os: ubuntu-latest + suffix: linux_arm + zig_target: arm-linux-gnueabihf.2.28 + alsa_arch: armhf + alsa_triple: arm-linux-gnueabihf + goarch: arm + goarm: "7" + - os: ubuntu-latest + suffix: windows_amd64 + zig_target: x86_64-windows-gnu + goos: windows + goarch: amd64 + - os: ubuntu-latest + suffix: windows_arm64 + zig_target: aarch64-windows-gnu + goos: windows + goarch: arm64 + - os: ubuntu-latest + suffix: windows_arm + goos: windows + goarch: arm + goarm: "7" + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v6 + with: + lfs: 'true' + submodules: true + + - run: git lfs pull + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version: "1.25" + + - name: Install Zig + if: matrix.zig_target + uses: mlugg/setup-zig@v2 + with: + version: 0.14.1 + + - name: Install ALSA headers + if: matrix.alsa_arch + run: | + sudo dpkg --add-architecture ${{ matrix.alsa_arch }} + if [ "${{ matrix.alsa_arch }}" != "amd64" ]; then + CODENAME=$(lsb_release -cs) + # Restrict existing sources to amd64 to avoid 404s for foreign arch + for f in /etc/apt/sources.list.d/*.sources; do + grep -q '^Architectures:' "$f" || sudo sed -i '/^Types:/a Architectures: amd64 i386' "$f" + done + # Add ports.ubuntu.com for the foreign architecture + printf 'Types: deb\nURIs: http://ports.ubuntu.com/ubuntu-ports\nSuites: %s %s-updates\nComponents: main universe\nArchitectures: %s\nSigned-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg\n' \ + "$CODENAME" "$CODENAME" "${{ matrix.alsa_arch }}" | sudo tee /etc/apt/sources.list.d/ports.sources + fi + sudo apt-get update + sudo apt-get install -y libasound2-dev:${{ matrix.alsa_arch }} + + - name: Generate Windows import libraries + if: matrix.goos == 'windows' && matrix.zig_target + run: | + ZIG_LIB=$(zig env | jq -r '.lib_dir') + echo "ZIG_LIB=${ZIG_LIB}" >> "$GITHUB_ENV" + LIB_DIR="${ZIG_LIB}/libc/mingw/lib-common" + # Zig bundles MinGW .def files but lld needs .a import libraries. + # Go's compiled objects embed COFF /DEFAULTLIB directives (e.g. dbghelp, + # bcrypt) that lld resolves directly, bypassing Zig's lazy .def→.a + # generation. Pre-generate all import libraries so lld can find them. + MACHINE=${{ matrix.goarch == 'amd64' && 'i386:x86-64' || 'arm64' }} + for def in "${LIB_DIR}"/*.def; do + lib=$(basename "$def" .def) + [ -f "${LIB_DIR}/lib${lib}.a" ] && continue + zig dlltool -d "$def" -l "${LIB_DIR}/lib${lib}.a" -m "$MACHINE" 2>/dev/null || true + done + + - name: Build + env: + CGO_ENABLED: ${{ (matrix.goos && !matrix.zig_target) && '0' || '1' }} + CC: ${{ matrix.zig_target && format('zig cc -target {0}', matrix.zig_target) || '' }} + CXX: ${{ matrix.zig_target && format('zig c++ -target {0}', matrix.zig_target) || '' }} + # Zig uses its own sysroot; point it at the system ALSA headers and libraries + CGO_CFLAGS: ${{ matrix.alsa_triple && format('-isystem /usr/include -isystem /usr/include/{0}', matrix.alsa_triple) || '' }} + CGO_LDFLAGS: ${{ matrix.alsa_triple && format('-L/usr/lib/{0}', matrix.alsa_triple) || '' }} + # -fms-extensions: enable __try/__except (SEH) used by WebRTC + # -DNTDDI_VERSION: target Windows 10 base to skip WinRT includes absent from MinGW + CGO_CXXFLAGS: ${{ matrix.goos == 'windows' && '-fms-extensions -DNTDDI_VERSION=0x0A000000' || '' }} + GOOS: ${{ matrix.goos || '' }} + GOARCH: ${{ matrix.goarch || '' }} + GOARM: ${{ matrix.goarm || '' }} + shell: bash + run: | + EXT=""; if [ "${GOOS:-}" = "windows" ]; then EXT=".exe"; fi + TAGS="" + if [ "$CGO_ENABLED" = "1" ]; then TAGS="-tags console"; fi + # Force external linking for Windows so Go uses zig cc (CC) as the linker, + # and add Zig's MinGW lib path so lld can find the generated import libraries. + EXTLD="" + if [ "${GOOS:-}" = "windows" ] && [ "$CGO_ENABLED" = "1" ]; then + EXTLD="-linkmode=external -extldflags '-L${ZIG_LIB}/libc/mingw/lib-common'" + fi + go build $TAGS -ldflags "-w -s $EXTLD" -o "${{ inputs.output_dir }}/lk${EXT}" ./cmd/lk + + - name: Verify binary + if: "!matrix.goos && !matrix.goarch" + run: ${{ inputs.output_dir }}/lk --help > /dev/null + + - name: Package and upload + if: inputs.upload_release + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + run: | + TAG="${GITHUB_REF#refs/tags/}" + VERSION="${TAG#v}" + NAME="lk_${VERSION}_${{ matrix.suffix }}" + cp LICENSE ${{ inputs.output_dir }}/ + cp -r autocomplete ${{ inputs.output_dir }}/ + if [[ "${{ matrix.suffix }}" == windows_* ]]; then + cd ${{ inputs.output_dir }} && zip -r "../${NAME}.zip" lk.exe LICENSE autocomplete && cd .. + gh release upload "$TAG" "${NAME}.zip" --clobber + else + tar -czf "${NAME}.tar.gz" -C ${{ inputs.output_dir }} lk LICENSE autocomplete + gh release upload "$TAG" "${NAME}.tar.gz" --clobber + fi diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b87a6552..99b8baae 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -20,32 +20,149 @@ on: pull_request: branches: [ main ] +permissions: + contents: read + jobs: - build: + lint-and-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - uses: actions/cache@v5 with: - path: | - ~/go/pkg/mod - ~/go/bin - ~/.cache - key: livekit-cli + submodules: true - name: Set up Go uses: actions/setup-go@v6 with: go-version: "1.25" - - name: Download Go modules - run: go mod download - - name: Static Check uses: dominikh/staticcheck-action@v1.4.0 with: version: "latest" install-go: false - - name: Run Go tests + - name: Test run: go test -v ./... + + build: + strategy: + fail-fast: false + matrix: + include: + - os: macos-latest + suffix: darwin_arm64 + - os: ubuntu-latest + suffix: linux_amd64 + zig_target: x86_64-linux-gnu.2.28 + alsa_arch: amd64 + alsa_triple: x86_64-linux-gnu + - os: ubuntu-latest + suffix: linux_arm64 + zig_target: aarch64-linux-gnu.2.28 + alsa_arch: arm64 + alsa_triple: aarch64-linux-gnu + goarch: arm64 + - os: ubuntu-latest + suffix: linux_arm + zig_target: arm-linux-gnueabihf.2.28 + alsa_arch: armhf + alsa_triple: arm-linux-gnueabihf + goarch: arm + goarm: "7" + - os: ubuntu-latest + suffix: windows_amd64 + zig_target: x86_64-windows-gnu + goos: windows + goarch: amd64 + - os: ubuntu-latest + suffix: windows_arm64 + zig_target: aarch64-windows-gnu + goos: windows + goarch: arm64 + - os: ubuntu-latest + suffix: windows_arm + goos: windows + goarch: arm + goarm: "7" + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version: "1.25" + + - name: Install Zig + if: matrix.zig_target + uses: mlugg/setup-zig@v2 + with: + version: 0.14.1 + + - name: Install ALSA headers + if: matrix.alsa_arch + run: | + sudo dpkg --add-architecture ${{ matrix.alsa_arch }} + if [ "${{ matrix.alsa_arch }}" != "amd64" ]; then + CODENAME=$(lsb_release -cs) + # Restrict existing sources to amd64 to avoid 404s for foreign arch + for f in /etc/apt/sources.list.d/*.sources; do + grep -q '^Architectures:' "$f" || sudo sed -i '/^Types:/a Architectures: amd64 i386' "$f" + done + # Add ports.ubuntu.com for the foreign architecture + printf 'Types: deb\nURIs: http://ports.ubuntu.com/ubuntu-ports\nSuites: %s %s-updates\nComponents: main universe\nArchitectures: %s\nSigned-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg\n' \ + "$CODENAME" "$CODENAME" "${{ matrix.alsa_arch }}" | sudo tee /etc/apt/sources.list.d/ports.sources + fi + sudo apt-get update + sudo apt-get install -y libasound2-dev:${{ matrix.alsa_arch }} + + - name: Generate Windows import libraries + if: matrix.goos == 'windows' && matrix.zig_target + run: | + ZIG_LIB=$(zig env | jq -r '.lib_dir') + echo "ZIG_LIB=${ZIG_LIB}" >> "$GITHUB_ENV" + LIB_DIR="${ZIG_LIB}/libc/mingw/lib-common" + # Zig bundles MinGW .def files but lld needs .a import libraries. + # Go's compiled objects embed COFF /DEFAULTLIB directives (e.g. dbghelp, + # bcrypt) that lld resolves directly, bypassing Zig's lazy .def→.a + # generation. Pre-generate all import libraries so lld can find them. + MACHINE=${{ matrix.goarch == 'amd64' && 'i386:x86-64' || 'arm64' }} + for def in "${LIB_DIR}"/*.def; do + lib=$(basename "$def" .def) + [ -f "${LIB_DIR}/lib${lib}.a" ] && continue + zig dlltool -d "$def" -l "${LIB_DIR}/lib${lib}.a" -m "$MACHINE" 2>/dev/null || true + done + + - name: Build + env: + CGO_ENABLED: ${{ (matrix.goos && !matrix.zig_target) && '0' || '1' }} + CC: ${{ matrix.zig_target && format('zig cc -target {0}', matrix.zig_target) || '' }} + CXX: ${{ matrix.zig_target && format('zig c++ -target {0}', matrix.zig_target) || '' }} + # Zig uses its own sysroot; point it at the system ALSA headers and libraries + CGO_CFLAGS: ${{ matrix.alsa_triple && format('-isystem /usr/include -isystem /usr/include/{0}', matrix.alsa_triple) || '' }} + CGO_LDFLAGS: ${{ matrix.alsa_triple && format('-L/usr/lib/{0}', matrix.alsa_triple) || '' }} + # -fms-extensions: enable __try/__except (SEH) used by WebRTC + # -DNTDDI_VERSION: target Windows 10 base to skip WinRT includes absent from MinGW + CGO_CXXFLAGS: ${{ matrix.goos == 'windows' && '-fms-extensions -DNTDDI_VERSION=0x0A000000' || '' }} + GOOS: ${{ matrix.goos || '' }} + GOARCH: ${{ matrix.goarch || '' }} + GOARM: ${{ matrix.goarm || '' }} + shell: bash + run: | + EXT=""; if [ "${GOOS:-}" = "windows" ]; then EXT=".exe"; fi + TAGS="" + if [ "$CGO_ENABLED" = "1" ]; then TAGS="-tags console"; fi + # Force external linking for Windows so Go uses zig cc (CC) as the linker, + # and add Zig's MinGW lib path so lld can find the generated import libraries. + EXTLD="" + if [ "${GOOS:-}" = "windows" ] && [ "$CGO_ENABLED" = "1" ]; then + EXTLD="-linkmode=external -extldflags '-L${ZIG_LIB}/libc/mingw/lib-common'" + fi + go build $TAGS -ldflags "-w -s $EXTLD" -o "bin/lk${EXT}" ./cmd/lk + + - name: Verify binary + if: "!matrix.goos && !matrix.goarch" + run: bin/lk --help > /dev/null diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 58485dc4..db3106cd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,6 +20,8 @@ jobs: steps: - uses: actions/checkout@v6 + with: + submodules: true - name: Set up Go uses: actions/setup-go@v6 diff --git a/Formula/lk.rb b/Formula/lk.rb new file mode 100644 index 00000000..841854fe --- /dev/null +++ b/Formula/lk.rb @@ -0,0 +1,47 @@ +# typed: false +# frozen_string_literal: true + +# This formula is meant for a custom Homebrew tap (e.g. livekit/homebrew-livekit). +# It installs a prebuilt binary with console support (PortAudio + WebRTC AEC). +# Usage: brew install livekit/livekit/lk +class Lk < Formula + desc "Command-line interface to LiveKit (with console support)" + homepage "https://livekit.io" + license "Apache-2.0" + version "VERSION" + + on_macos do + if Hardware::CPU.arm? + url "https://github.com/livekit/livekit-cli/releases/download/vVERSION/lk_VERSION_darwin_arm64.tar.gz" + sha256 "SHA256_DARWIN_ARM64" + end + end + + on_linux do + if Hardware::CPU.arm? && Hardware::CPU.is_64_bit? + url "https://github.com/livekit/livekit-cli/releases/download/vVERSION/lk_VERSION_linux_arm64.tar.gz" + sha256 "SHA256_LINUX_ARM64" + elsif Hardware::CPU.arm? + url "https://github.com/livekit/livekit-cli/releases/download/vVERSION/lk_VERSION_linux_arm.tar.gz" + sha256 "SHA256_LINUX_ARM" + else + url "https://github.com/livekit/livekit-cli/releases/download/vVERSION/lk_VERSION_linux_amd64.tar.gz" + sha256 "SHA256_LINUX_AMD64" + end + end + + def install + bin.install "lk" + bin.install_symlink "lk" => "livekit-cli" + + bash_completion.install "autocomplete/bash_autocomplete" => "lk" + fish_completion.install "autocomplete/fish_autocomplete" => "lk.fish" + zsh_completion.install "autocomplete/zsh_autocomplete" => "_lk" + end + + test do + output = shell_output("#{bin}/lk token create --list --api-key key --api-secret secret") + assert_match "valid for (mins): 5", output + assert_match "lk version #{version}", shell_output("#{bin}/lk --version") + end +end diff --git a/Makefile b/Makefile index 0b08d5aa..573fea25 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,9 @@ cli: check_lfs GOOS=windows GOARCH=amd64 go build -ldflags "-w -s" -o bin/lk.exe ./cmd/lk +console: + CGO_ENABLED=1 go build -tags console -ldflags "-w -s" -o bin/lk ./cmd/lk + install: cli ifeq ($(DETECTED_OS),Windows) cp bin/lk.exe $(GOBIN)/lk.exe diff --git a/README.md b/README.md index 0350759d..eb7b8a93 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,30 @@ git clone https://github.com/livekit/livekit-cli && cd livekit-cli make install ``` +### Building with console support + +The `lk agent console` command (voice chat with an agent via mic/speakers) requires native dependencies (PortAudio, WebRTC audio processing) and is built separately with a build tag. + +This repo uses git submodules for vendored native sources. Make sure to clone with submodules: + +```shell +git clone --recurse-submodules https://github.com/livekit/livekit-cli && cd livekit-cli +``` + +Or if you've already cloned: + +```shell +git submodule update --init --recursive +``` + +Then build with the `console` tag: + +```shell +make console +``` + +This produces a `bin/lk` binary with console support enabled. + # Usage See `lk --help` for a complete list of subcommands. The `--help` flag can also be used on any subcommand for more information. diff --git a/cmd/lk/agent.go b/cmd/lk/agent.go index e6745d0a..ce7867d7 100644 --- a/cmd/lk/agent.go +++ b/cmd/lk/agent.go @@ -347,6 +347,7 @@ var ( ArgsUsage: "[working-dir]", }, privateLinkCommands, + simulateCommand, }, }, } @@ -561,7 +562,7 @@ func createAgent(ctx context.Context, cmd *cli.Command) error { projectType, err := agentfs.DetectProjectType(os.DirFS(workingDir)) if err != nil { - return fmt.Errorf("unable to determine agent language: %w, please navigate to a directory containing an agent written in a supported language", err) + return noAgentError() } fmt.Printf("Detected agent language [%s]\n", util.Accented(string(projectType))) @@ -748,7 +749,7 @@ func deployAgent(ctx context.Context, cmd *cli.Command) error { projectType, err := agentfs.DetectProjectType(os.DirFS(workingDir)) if err != nil { - return fmt.Errorf("unable to determine agent language: %w, please make sure you are inside a directory containing an agent written in a supported language", err) + return noAgentError() } fmt.Printf("Detected agent language [%s]\n", util.Accented(string(projectType))) @@ -1492,7 +1493,7 @@ func generateAgentDockerfile(ctx context.Context, cmd *cli.Command) error { projectType, err := agentfs.DetectProjectType(os.DirFS(workingDir)) if err != nil { - return fmt.Errorf("unable to determine agent language: %w, please make sure you are inside a directory containing an agent written in a supported language", err) + return noAgentError() } fmt.Printf("Detected agent language [%s]\n", util.Accented(string(projectType))) diff --git a/cmd/lk/agent_reload.go b/cmd/lk/agent_reload.go new file mode 100644 index 00000000..703e63ea --- /dev/null +++ b/cmd/lk/agent_reload.go @@ -0,0 +1,99 @@ +package main + +import ( + "fmt" + "net" + "sync" + "time" + + agent "github.com/livekit/protocol/livekit/agent" + + "github.com/livekit/livekit-cli/v2/pkg/ipc" +) + +// reloadServer manages the dev-mode reload protocol between Go and Python processes. +// Flow: +// 1. Go → old Python: GetRunningJobsRequest → receives GetRunningJobsResponse (capture) +// 2. New Python → Go: GetRunningJobsRequest → Go replies with saved GetRunningJobsResponse (restore) +type reloadServer struct { + listener *ipc.Listener + mu sync.Mutex + savedJobs *agent.GetRunningAgentJobsResponse +} + +func newReloadServer() (*reloadServer, error) { + ln, err := ipc.Listen("127.0.0.1:0") + if err != nil { + return nil, fmt.Errorf("reload server: %w", err) + } + return &reloadServer{listener: ln}, nil +} + +func (rs *reloadServer) addr() string { + return rs.listener.Addr().String() +} + +// captureJobs sends GetRunningJobsRequest to the old Python process and stores the response. +func (rs *reloadServer) captureJobs(conn net.Conn) { + conn.SetDeadline(time.Now().Add(1500 * time.Millisecond)) + defer conn.SetDeadline(time.Time{}) + + req := &agent.AgentDevMessage{ + Message: &agent.AgentDevMessage_GetRunningJobsRequest{ + GetRunningJobsRequest: &agent.GetRunningAgentJobsRequest{}, + }, + } + if err := ipc.WriteProto(conn, req); err != nil { + fmt.Printf("reload: failed to send capture request: %v\n", err) + return + } + + resp := &agent.AgentDevMessage{} + if err := ipc.ReadProto(conn, resp); err != nil { + fmt.Printf("reload: failed to read capture response: %v\n", err) + return + } + + if jobs := resp.GetGetRunningJobsResponse(); jobs != nil { + rs.mu.Lock() + rs.savedJobs = jobs + rs.mu.Unlock() + fmt.Printf("reload: captured %d running job(s)\n", len(jobs.Jobs)) + } +} + +// serveNewProcess handles a GetRunningJobsRequest from the new Python process, +// replying with the previously captured jobs. +func (rs *reloadServer) serveNewProcess(conn net.Conn) { + req := &agent.AgentDevMessage{} + if err := ipc.ReadProto(conn, req); err != nil { + return + } + if req.GetGetRunningJobsRequest() == nil { + return + } + + rs.mu.Lock() + saved := rs.savedJobs + rs.savedJobs = nil + rs.mu.Unlock() + + if saved == nil { + saved = &agent.GetRunningAgentJobsResponse{} + } + + resp := &agent.AgentDevMessage{ + Message: &agent.AgentDevMessage_GetRunningJobsResponse{ + GetRunningJobsResponse: saved, + }, + } + if err := ipc.WriteProto(conn, resp); err != nil { + fmt.Printf("reload: failed to send restore response: %v\n", err) + } else if len(saved.Jobs) > 0 { + fmt.Printf("reload: restored %d job(s) to new process\n", len(saved.Jobs)) + } +} + +func (rs *reloadServer) close() error { + return rs.listener.Close() +} diff --git a/cmd/lk/agent_run.go b/cmd/lk/agent_run.go new file mode 100644 index 00000000..483c36de --- /dev/null +++ b/cmd/lk/agent_run.go @@ -0,0 +1,287 @@ +// Copyright 2025 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "path/filepath" + "sync" + "syscall" + + "github.com/urfave/cli/v3" + + "github.com/livekit/livekit-cli/v2/pkg/agentfs" +) + +func init() { + AgentCommands[0].Commands = append(AgentCommands[0].Commands, startCommand, devCommand) +} + +var agentRunFlags = []cli.Flag{ + &cli.StringFlag{ + Name: "entrypoint", + Usage: "Agent entrypoint `FILE` (default: auto-detect)", + }, + &cli.StringFlag{ + Name: "url", + Usage: "LiveKit server `URL`", + Sources: cli.EnvVars("LIVEKIT_URL"), + }, + &cli.StringFlag{ + Name: "api-key", + Usage: "LiveKit API `KEY`", + Sources: cli.EnvVars("LIVEKIT_API_KEY"), + }, + &cli.StringFlag{ + Name: "api-secret", + Usage: "LiveKit API `SECRET`", + Sources: cli.EnvVars("LIVEKIT_API_SECRET"), + }, + &cli.StringFlag{ + Name: "log-level", + Usage: "Log level (TRACE, DEBUG, INFO, WARN, ERROR)", + }, +} + +var startCommand = &cli.Command{ + Name: "start", + Usage: "Run an agent in production mode", + Flags: agentRunFlags, + Action: runAgentStart, +} + +var devCommand = &cli.Command{ + Name: "dev", + Usage: "Run an agent in development mode with auto-reload", + Flags: append(agentRunFlags, &cli.BoolFlag{ + Name: "no-reload", + Usage: "Disable auto-reload on file changes", + }), + Action: runAgentDev, +} + +// resolveCredentials returns CLI args (--url, --api-key, --api-secret) for the agent subprocess. +func resolveCredentials(cmd *cli.Command, loadOpts ...loadOption) ([]string, error) { + url := cmd.String("url") + apiKey := cmd.String("api-key") + apiSecret := cmd.String("api-secret") + + // Try project config if any are missing + if url == "" || apiKey == "" || apiSecret == "" { + opts := append([]loadOption{ignoreURL}, loadOpts...) + pc, err := loadProjectDetails(cmd, opts...) + if err != nil { + return nil, err + } + if pc != nil { + if url == "" { + url = pc.URL + } + if apiKey == "" { + apiKey = pc.APIKey + } + if apiSecret == "" { + apiSecret = pc.APISecret + } + } + } + + var args []string + if url != "" { + args = append(args, "--url", url) + } + if apiKey != "" { + args = append(args, "--api-key", apiKey) + } + if apiSecret != "" { + args = append(args, "--api-secret", apiSecret) + } + return args, nil +} + +func noAgentError() error { + return fmt.Errorf("no agent project detected in the current directory\n\n" + + "Make sure you are running this command from an agent project directory\n" + + "containing one of: pyproject.toml, requirements.txt, uv.lock, package.json, or lock files.\n\n" + + "To get started, see: https://docs.livekit.io/agents/quickstart") +} + +func detectProject(cmd *cli.Command) (string, agentfs.ProjectType, string, error) { + explicit := cmd.String("entrypoint") + + detectFrom := "." + if explicit != "" { + absPath, err := filepath.Abs(explicit) + if err != nil { + return "", "", "", err + } + if _, err := os.Stat(absPath); err != nil { + return "", "", "", fmt.Errorf("entrypoint file not found: %s", explicit) + } + detectFrom = filepath.Dir(absPath) + } + + projectDir, projectType, err := agentfs.DetectProjectRoot(detectFrom) + if err != nil { + return "", "", "", noAgentError() + } + if !projectType.IsPython() { + return "", "", "", fmt.Errorf("currently only supports Python agents (detected: %s)", projectType) + } + + if explicit != "" { + absPath, _ := filepath.Abs(explicit) + rel, err := filepath.Rel(projectDir, absPath) + if err != nil { + return "", "", "", fmt.Errorf("entrypoint %s is outside project root %s", explicit, projectDir) + } + return projectDir, projectType, rel, nil + } + + entrypoint, err := findEntrypoint(projectDir, "", projectType) + if err != nil { + return "", "", "", err + } + return projectDir, projectType, entrypoint, nil +} + +func buildCLIArgs(subcmd string, cmd *cli.Command, loadOpts ...loadOption) ([]string, error) { + args := []string{subcmd} + if logLevel := cmd.String("log-level"); logLevel != "" { + args = append(args, "--log-level", logLevel) + } + creds, err := resolveCredentials(cmd, loadOpts...) + if err != nil { + return nil, err + } + args = append(args, creds...) + return args, nil +} + +func runAgentStart(ctx context.Context, cmd *cli.Command) error { + projectDir, projectType, entrypoint, err := detectProject(cmd) + if err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Detected %s agent (%s in %s)\n", projectType.Lang(), entrypoint, projectDir) + + cliArgs, err := buildCLIArgs("start", cmd, quietOutput) + if err != nil { + return err + } + + agent, err := startAgent(AgentStartConfig{ + Dir: projectDir, + Entrypoint: entrypoint, + ProjectType: projectType, + CLIArgs: cliArgs, + ForwardOutput: os.Stdout, + }) + if err != nil { + return err + } + + // Take over signal handling from the global NotifyContext. + signal.Reset(syscall.SIGINT, syscall.SIGTERM) + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + + // Forward every signal to the agent — Python decides + // first = graceful shutdown, second = force exit. + go func() { + for range sigCh { + agent.Shutdown() + } + }() + + // Wait for agent to exit + <-agent.exitCh + signal.Stop(sigCh) + return nil +} + +func runAgentDev(ctx context.Context, cmd *cli.Command) error { + projectDir, projectType, entrypoint, err := detectProject(cmd) + if err != nil { + return err + } + + cliArgs, err := buildCLIArgs("start", cmd, outputToStderr) + if err != nil { + return err + } + if cmd.String("log-level") == "" { + cliArgs = append(cliArgs, "--log-level", "DEBUG") + } + + cfg := AgentStartConfig{ + Dir: projectDir, + Entrypoint: entrypoint, + ProjectType: projectType, + CLIArgs: cliArgs, + ForwardOutput: os.Stdout, + } + + fmt.Fprintf(os.Stderr, "Detected %s agent (%s in %s)\n", projectType.Lang(), entrypoint, projectDir) + + // Take over signal handling from the global NotifyContext. + signal.Reset(syscall.SIGINT, syscall.SIGTERM) + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + + if cmd.Bool("no-reload") { + // No reload — just run like start + agent, err := startAgent(cfg) + if err != nil { + return err + } + + go func() { + for range sigCh { + agent.Shutdown() + } + }() + + <-agent.exitCh + signal.Stop(sigCh) + return nil + } + + // Dev mode with file watching + watcher, err := newAgentWatcher(cfg) + if err != nil { + return err + } + + done := make(chan struct{}) + doneOnce := sync.Once{} + + // Forward signals to the current agent, and stop the watcher on first signal. + go func() { + for range sigCh { + doneOnce.Do(func() { close(done) }) + if watcher.agent != nil { + watcher.agent.Shutdown() + } + } + }() + + err = watcher.Run(done) + signal.Stop(sigCh) + return err +} diff --git a/cmd/lk/agent_watcher.go b/cmd/lk/agent_watcher.go new file mode 100644 index 00000000..69af9a3a --- /dev/null +++ b/cmd/lk/agent_watcher.go @@ -0,0 +1,242 @@ +// Copyright 2021-2024 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "net" + "os" + "path/filepath" + "strings" + "time" + + "github.com/fsnotify/fsnotify" + "github.com/livekit/livekit-cli/v2/pkg/agentfs" +) + +// skipDirs are directories to never watch. +var skipDirs = map[string]bool{ + ".git": true, ".hg": true, ".svn": true, + "__pycache__": true, ".mypy_cache": true, ".pytest_cache": true, ".ruff_cache": true, + ".venv": true, "venv": true, "env": true, + "node_modules": true, ".next": true, "dist": true, "build": true, +} + +// watchExtensions returns file extensions to watch for a project type. +func watchExtensions(pt agentfs.ProjectType) map[string]bool { + if pt.IsPython() { + return map[string]bool{".py": true} + } + return map[string]bool{".js": true, ".ts": true, ".mjs": true, ".mts": true} +} + +// agentWatcher watches for file changes and restarts an agent subprocess. +type agentWatcher struct { + config AgentStartConfig + exts map[string]bool + debounce time.Duration + watcher *fsnotify.Watcher + agent *AgentProcess + restartCh chan struct{} + + reloadSrv *reloadServer + conn net.Conn +} + +func newAgentWatcher(config AgentStartConfig) (*agentWatcher, error) { + w, err := fsnotify.NewWatcher() + if err != nil { + return nil, fmt.Errorf("failed to create file watcher: %w", err) + } + + // Walk directory tree and add all non-skip directories + err = filepath.Walk(config.Dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if !info.IsDir() { + return nil + } + name := info.Name() + if skipDirs[name] || (strings.HasPrefix(name, ".") && name != ".") { + return filepath.SkipDir + } + return w.Add(path) + }) + if err != nil { + w.Close() + return nil, fmt.Errorf("failed to setup file watcher: %w", err) + } + + rs, err := newReloadServer() + if err != nil { + w.Close() + return nil, err + } + + // Append --reload-addr to CLI args so the Python process connects back + config.CLIArgs = append(config.CLIArgs, "--reload-addr", rs.addr()) + + return &agentWatcher{ + config: config, + exts: watchExtensions(config.ProjectType), + debounce: 500 * time.Millisecond, + watcher: w, + restartCh: make(chan struct{}, 1), + reloadSrv: rs, + }, nil +} + +func (aw *agentWatcher) start() error { + agent, err := startAgent(aw.config) + if err != nil { + return err + } + aw.agent = agent + + // Accept connection from new Python process in background + go func() { + conn, err := aw.reloadSrv.listener.Accept() + if err != nil { + return + } + aw.conn = conn + // Serve the initial restore request (will be empty on first start) + go aw.reloadSrv.serveNewProcess(conn) + }() + + return nil +} + +func (aw *agentWatcher) restart() error { + // 1. Capture active jobs from the current process (best-effort) + if aw.conn != nil { + aw.reloadSrv.captureJobs(aw.conn) + aw.conn.Close() + aw.conn = nil + } + + // 2. Kill old process + if aw.agent != nil { + aw.agent.Kill() + } + + fmt.Fprintln(os.Stderr, "Reloading agent...") + + // 3. Start new process + agent, err := startAgent(aw.config) + if err != nil { + return err + } + aw.agent = agent + + // 4. Accept new connection and serve restored jobs + go func() { + conn, err := aw.reloadSrv.listener.Accept() + if err != nil { + return + } + aw.conn = conn + go aw.reloadSrv.serveNewProcess(conn) + }() + + return nil +} + +// Run watches for file changes and restarts the agent. Blocks until done is closed. +func (aw *agentWatcher) Run(done <-chan struct{}) error { + if err := aw.start(); err != nil { + return err + } + defer func() { + if aw.agent != nil { + // If Shutdown() was already called by the signal forwarder, + // just wait for exit. Otherwise send SIGINT ourselves. + if !aw.agent.shutdownCalled { + aw.agent.Shutdown() + } + select { + case <-aw.agent.exitCh: + case <-time.After(5 * time.Second): + aw.agent.ForceKill() + } + } + if aw.conn != nil { + aw.conn.Close() + } + aw.reloadSrv.close() + aw.watcher.Close() + }() + + var debounceTimer *time.Timer + var debounceCh <-chan time.Time + + for { + select { + case <-done: + return nil + + case event, ok := <-aw.watcher.Events: + if !ok { + return nil + } + // Only trigger on relevant file extensions + if !aw.exts[filepath.Ext(event.Name)] { + continue + } + // Only care about writes, creates, renames + if event.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Rename) == 0 { + continue + } + // Add new directories to the watch list + if event.Op&fsnotify.Create != 0 { + if info, err := os.Stat(event.Name); err == nil && info.IsDir() { + _ = aw.watcher.Add(event.Name) + } + } + // Start or reset debounce timer + if debounceTimer == nil { + debounceTimer = time.NewTimer(aw.debounce) + debounceCh = debounceTimer.C + } else { + debounceTimer.Reset(aw.debounce) + } + + case <-debounceCh: + debounceTimer = nil + debounceCh = nil + if err := aw.restart(); err != nil { + fmt.Fprintf(os.Stderr, "Failed to restart agent: %v\n", err) + fmt.Fprintln(os.Stderr, "Waiting for file changes...") + } + + case err, ok := <-aw.watcher.Errors: + if !ok { + return nil + } + fmt.Fprintf(os.Stderr, "Watcher error: %v\n", err) + + case <-aw.agent.exitCh: + // Agent crashed — wait for file changes to restart + fmt.Fprintln(os.Stderr, "Agent exited. Waiting for file changes to restart...") + // Drain any pending debounce + if debounceTimer != nil { + debounceTimer.Stop() + debounceTimer = nil + debounceCh = nil + } + } + } +} diff --git a/cmd/lk/console.go b/cmd/lk/console.go new file mode 100644 index 00000000..51d62f59 --- /dev/null +++ b/cmd/lk/console.go @@ -0,0 +1,288 @@ +//go:build console + +// Copyright 2025 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "io" + "log" + "net" + "os" + "os/signal" + "strings" + "syscall" + "time" + + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + "github.com/urfave/cli/v3" + + "github.com/livekit/livekit-cli/v2/pkg/console" + "github.com/livekit/livekit-cli/v2/pkg/portaudio" +) + +func init() { + AgentCommands[0].Commands = append(AgentCommands[0].Commands, consoleCommand) +} + +var consoleCommand = &cli.Command{ + Name: "console", + Usage: "Voice chat with an agent via mic/speakers", + Category: "Core", + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "port", + Aliases: []string{"p"}, + Usage: "TCP port for agent communication", + Value: 0, + }, + &cli.StringFlag{ + Name: "input-device", + Usage: "Input device index or name substring", + }, + &cli.StringFlag{ + Name: "output-device", + Usage: "Output device index or name substring", + }, + &cli.BoolFlag{ + Name: "list-devices", + Usage: "List available audio devices and exit", + }, + &cli.BoolFlag{ + Name: "no-aec", + Usage: "Disable acoustic echo cancellation", + }, + &cli.BoolFlag{ + Name: "text", + Aliases: []string{"t"}, + Usage: "Start in text mode instead of audio mode", + }, + &cli.BoolFlag{ + Name: "record", + Usage: "Record audio and session report to console-recordings/", + }, + &cli.StringFlag{ + Name: "entrypoint", + Usage: "Agent entrypoint `FILE` (default: auto-detect)", + }, + }, + Action: runConsole, +} + +func runConsole(ctx context.Context, cmd *cli.Command) error { + textMode := cmd.Bool("text") + + var inputDev, outputDev *portaudio.DeviceInfo + if !textMode { + if err := portaudio.Initialize(); err != nil { + return fmt.Errorf("failed to initialize PortAudio: %w", err) + } + defer portaudio.Terminate() + + if cmd.Bool("list-devices") { + return listDevices() + } + + var err error + if q := cmd.String("input-device"); q != "" { + inputDev, err = portaudio.FindDevice(q, true) + } else { + inputDev, err = portaudio.DefaultInputDevice() + } + if err != nil { + return fmt.Errorf("input device: %w", err) + } + + if q := cmd.String("output-device"); q != "" { + outputDev, err = portaudio.FindDevice(q, false) + } else { + outputDev, err = portaudio.DefaultOutputDevice() + } + if err != nil { + return fmt.Errorf("output device: %w", err) + } + } + + port := cmd.Int("port") + addr := fmt.Sprintf("127.0.0.1:%d", port) + var err error + server, err := console.NewTCPServer(addr) + if err != nil { + return err + } + defer server.Close() + + actualAddr := server.Addr().String() + if inputDev != nil { + fmt.Fprintf(os.Stderr, "Input: %s\n", inputDev.Name) + fmt.Fprintf(os.Stderr, "Output: %s\n", outputDev.Name) + } + + projectDir, projectType, entrypoint, err := detectProject(cmd) + if err != nil { + return err + } + + fmt.Fprintf(os.Stderr, "Detected %s agent (%s in %s)\n", projectType.Lang(), entrypoint, projectDir) + + // Show spinner while starting agent + stopSpinner := startSpinner("Starting agent") + agentProc, err := startAgent(AgentStartConfig{ + Dir: projectDir, + Entrypoint: entrypoint, + ProjectType: projectType, + CLIArgs: buildConsoleArgs(actualAddr, cmd.Bool("record")), + }) + if err != nil { + stopSpinner() + return fmt.Errorf("failed to start agent: %w", err) + } + defer agentProc.Kill() + + // Stream agent logs to the TUI + agentProc.LogStream = make(chan string, 128) + + // Wait for TCP connection, agent crash, timeout, or cancellation + type acceptResult struct { + conn net.Conn + err error + } + acceptCh := make(chan acceptResult, 1) + go func() { + conn, err := server.Accept() + acceptCh <- acceptResult{conn, err} + }() + + var conn net.Conn + select { + case res := <-acceptCh: + stopSpinner() + if res.err != nil { + return fmt.Errorf("agent connection: %w", res.err) + } + conn = res.conn + case err := <-agentProc.Done(): + stopSpinner() + logs := agentProc.RecentLogs(20) + for _, l := range logs { + fmt.Fprintln(os.Stderr, l) + } + if err != nil { + return fmt.Errorf("agent exited before connecting: %w", err) + } + return fmt.Errorf("agent exited before connecting") + case <-time.After(60 * time.Second): + stopSpinner() + logs := agentProc.RecentLogs(20) + for _, l := range logs { + fmt.Fprintln(os.Stderr, l) + } + return fmt.Errorf("timed out waiting for agent to connect") + case <-ctx.Done(): + stopSpinner() + return ctx.Err() + } + pipeline, err := console.NewPipeline(console.PipelineConfig{ + InputDevice: inputDev, + OutputDevice: outputDev, + NoAEC: cmd.Bool("no-aec"), + Conn: conn, + }) + if err != nil { + return fmt.Errorf("pipeline: %w", err) + } + + pipelineCtx, pipelineCancel := context.WithCancel(ctx) + defer pipelineCancel() + + go func() { + pipeline.Start(pipelineCtx) + }() + + // Redirect Go's default logger to discard so it doesn't corrupt the TUI + log.SetOutput(io.Discard) + + // Remove the global SIGINT handler (from signal.NotifyContext in main.go) + // so that ctrl+C in raw mode reaches Bubble Tea as a key event, and after + // the TUI exits, a ctrl+C during cleanup uses the default handler (terminate). + signal.Reset(syscall.SIGINT) + + var inputDevName, outputDevName string + if inputDev != nil { + inputDevName = inputDev.Name + } + if outputDev != nil { + outputDevName = outputDev.Name + } + model := newConsoleModel(pipeline, pipelineCancel, agentProc, inputDevName, outputDevName, textMode) + p := tea.NewProgram(model, tea.WithoutSignalHandler()) + + if _, err := p.Run(); err != nil { + return err + } + + return nil +} + +func buildConsoleArgs(addr string, record bool) []string { + args := []string{"console", "--connect-addr", addr} + if record { + args = append(args, "--record") + } + return args +} + +func listDevices() error { + devices, err := portaudio.ListDevices() + if err != nil { + return err + } + + headerStyle := lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("6")) + defaultStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("2")) + + fmt.Println(headerStyle.Render(fmt.Sprintf(" %-4s %-8s %-45s %s", "#", "Type", "Name", "Default"))) + fmt.Println(strings.Repeat("─", 70)) + + for _, d := range devices { + devType := "" + if d.MaxInputChannels > 0 && d.MaxOutputChannels > 0 { + devType = "Both" + } else if d.MaxInputChannels > 0 { + devType = "Input" + } else { + devType = "Output" + } + + defStr := "" + if d.IsDefaultInput { + defStr += defaultStyle.Render("✓ input") + } + if d.IsDefaultOutput { + if defStr != "" { + defStr += " " + } + defStr += defaultStyle.Render("✓ output") + } + + fmt.Printf(" %-4d %-8s %-45s %s\n", d.Index, devType, d.Name, defStr) + } + + return nil +} + diff --git a/cmd/lk/console_stub.go b/cmd/lk/console_stub.go new file mode 100644 index 00000000..452bf181 --- /dev/null +++ b/cmd/lk/console_stub.go @@ -0,0 +1,24 @@ +//go:build !console + +package main + +import ( + "context" + "fmt" + + "github.com/urfave/cli/v3" +) + +func init() { + AgentCommands[0].Commands = append(AgentCommands[0].Commands, &cli.Command{ + Name: "console", + Usage: "Voice chat with an agent via mic/speakers", + Action: func(ctx context.Context, cmd *cli.Command) error { + return fmt.Errorf("console is not included in this build (requires -tags console).\n\n" + + "Install with console support:\n" + + " https://docs.livekit.io/intro/basics/cli/start/\n\n" + + "Or build from source:\n" + + " go build -tags console ./cmd/lk") + }, + }) +} diff --git a/cmd/lk/console_tui.go b/cmd/lk/console_tui.go new file mode 100644 index 00000000..cd4f5c70 --- /dev/null +++ b/cmd/lk/console_tui.go @@ -0,0 +1,702 @@ +//go:build console + +// Copyright 2025 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + "time" + + "github.com/charmbracelet/bubbles/textinput" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + + agent "github.com/livekit/protocol/livekit/agent" + + "github.com/livekit/livekit-cli/v2/pkg/console" +) + +// Console-specific styles (tagStyle, greenStyle, redStyle, dimStyle, boldStyle, cyanStyle +// are inherited from simulate_tui.go which is always compiled) +var ( + lkCyan = lipgloss.Color("#1fd5f9") + lkPurple = lipgloss.Color("#8f83ff") + lkGreen = lipgloss.Color("#6BCB77") + lkRed = lipgloss.Color("#EF4444") + + labelStyle = lipgloss.NewStyle().Foreground(lkPurple) + cyanBoldStyle = lipgloss.NewStyle().Foreground(lkCyan).Bold(true) + greenBoldStyle = lipgloss.NewStyle().Foreground(lkGreen).Bold(true) + redBoldStyle = lipgloss.NewStyle().Foreground(lkRed).Bold(true) +) + +// Unicode block characters for frequency visualizer (matching Python console) +var blocks = []string{"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"} + +// Braille spinner frames (matching Rich's "dots" spinner) +var spinnerFrames = []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"} + +// startSpinner shows a braille spinner on stderr with the given message. +// Returns a stop function that clears the spinner line. +func startSpinner(msg string) func() { + done := make(chan struct{}) + go func() { + i := 0 + for { + select { + case <-done: + fmt.Fprintf(os.Stderr, "\r\033[K") + return + default: + fmt.Fprintf(os.Stderr, "\r %s %s", spinnerFrames[i%len(spinnerFrames)], msg) + i++ + time.Sleep(80 * time.Millisecond) + } + } + }() + return func() { close(done) } +} + +type consoleTickMsg struct{} +type sessionEventMsg struct{ event *agent.AgentSessionEvent } +type sessionResponseMsg struct{ resp *agent.SessionResponse } +type audioInitResultMsg struct{ err error } +type agentLogMsg struct{ line string } +type agentExitedMsg struct{} +type shutdownTimeoutMsg struct{} + +type consoleModel struct { + pipeline *console.AudioPipeline + pipelineCancel context.CancelFunc + agentProc *AgentProcess + inputDev string + outputDev string + + width int + + // Partial user transcription (not yet final) + partialTranscript string + + // Text mode + textMode bool + textInput textinput.Model + + // Shortcut help toggle (? key) + showShortcuts bool + + // Audio init error (shown when switching from text to audio fails) + audioError string + + // Last turn metrics text (cleared on next thinking state) + metricsText string + + // Request counter for unique IDs + reqCounter int + + // Waiting for agent response (text mode loading indicator) + waitingForAgent bool + + // Shutdown state + shuttingDown bool +} + +func newConsoleModel(pipeline *console.AudioPipeline, pipelineCancel context.CancelFunc, agentProc *AgentProcess, inputDev, outputDev string, textMode bool) consoleModel { + ti := textinput.New() + ti.Placeholder = "Type to talk to your agent" + ti.CharLimit = 1000 + ti.Width = 60 + ti.Prompt = "❯ " + ti.PromptStyle = boldStyle + + if textMode { + ti.Focus() + } + + return consoleModel{ + pipeline: pipeline, + pipelineCancel: pipelineCancel, + agentProc: agentProc, + inputDev: inputDev, + outputDev: outputDev, + textInput: ti, + textMode: textMode, + } +} + +func (m consoleModel) Init() tea.Cmd { + cmds := []tea.Cmd{ + consoleTickCmd(), + pollEventsCmd(m.pipeline), + pollResponsesCmd(m.pipeline), + } + if m.agentProc != nil && m.agentProc.LogStream != nil { + cmds = append(cmds, pollLogsCmd(m.agentProc.LogStream)) + } + if m.textMode { + cmds = append(cmds, textinput.Blink) + } + return tea.Batch(cmds...) +} + +func consoleTickCmd() tea.Cmd { + return tea.Tick(80*time.Millisecond, func(t time.Time) tea.Msg { + return consoleTickMsg{} + }) +} + +func pollEventsCmd(pipeline *console.AudioPipeline) tea.Cmd { + return func() tea.Msg { + ev, ok := <-pipeline.Events + if !ok { + return nil + } + return sessionEventMsg{event: ev} + } +} + +func pollResponsesCmd(pipeline *console.AudioPipeline) tea.Cmd { + return func() tea.Msg { + resp, ok := <-pipeline.Responses + if !ok { + return nil + } + return sessionResponseMsg{resp: resp} + } +} + +func pollLogsCmd(ch chan string) tea.Cmd { + return func() tea.Msg { + line, ok := <-ch + if !ok { + return nil + } + return agentLogMsg{line: line} + } +} + +func (m consoleModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + case tea.KeyMsg: + if m.shuttingDown { + if msg.String() == "ctrl+c" { + m.agentProc.ForceKill() + m.pipelineCancel() + go m.pipeline.Stop() + return m, tea.Quit + } + return m, nil + } + if m.textMode { + return m.updateTextMode(msg) + } + switch msg.String() { + case "q", "ctrl+c": + return m, m.beginShutdown() + case "m": + m.pipeline.SetMuted(!m.pipeline.Muted()) + case "ctrl+t": + m.textMode = true + m.showShortcuts = false + m.textInput.Focus() + return m, textinput.Blink + case "?": + m.showShortcuts = !m.showShortcuts + case "esc": + m.showShortcuts = false + } + + case tea.WindowSizeMsg: + m.width = msg.Width + + case consoleTickMsg: + if m.shuttingDown { + return m, nil + } + return m, consoleTickCmd() + + case sessionEventMsg: + if m.shuttingDown { + return m, nil + } + cmds := m.handleSessionEvent(msg.event) + cmds = append(cmds, pollEventsCmd(m.pipeline)) + return m, tea.Batch(cmds...) + + case sessionResponseMsg: + if m.waitingForAgent { + m.waitingForAgent = false + if m.textMode { + m.textInput.Focus() + } + } + return m, pollResponsesCmd(m.pipeline) + + case audioInitResultMsg: + if msg.err != nil { + m.audioError = msg.err.Error() + } else { + m.textMode = false + m.showShortcuts = false + m.textInput.Blur() + m.audioError = "" + m.inputDev = "Default Input" + m.outputDev = "Default Output" + } + return m, nil + + case agentLogMsg: + cmd := tea.Println(dimStyle.Render(msg.line)) + var nextCmd tea.Cmd + if m.agentProc != nil && m.agentProc.LogStream != nil { + nextCmd = pollLogsCmd(m.agentProc.LogStream) + } + return m, tea.Batch(cmd, nextCmd) + + case agentExitedMsg: + return m, tea.Quit + + case shutdownTimeoutMsg: + m.agentProc.ForceKill() + m.pipelineCancel() + go m.pipeline.Stop() + return m, tea.Quit + } + + return m, nil +} + +func (m *consoleModel) switchToAudio() tea.Cmd { + if m.pipeline.HasAudio() { + m.textMode = false + m.showShortcuts = false + m.textInput.Blur() + m.audioError = "" + return nil + } + // Lazy init audio in a goroutine + return func() tea.Msg { + return audioInitResultMsg{err: m.pipeline.EnableAudio()} + } +} + +func (m *consoleModel) beginShutdown() tea.Cmd { + m.shuttingDown = true + m.textMode = false + m.showShortcuts = false + + // Close the audio pipeline/TCP connection first so the agent's audio + // input ends and STT stops receiving data. Then send SIGINT so the + // agent's session.aclose() runs with nothing left to drain. + m.pipelineCancel() + go m.pipeline.Stop() + + m.agentProc.Shutdown() + + // Wait for agent exit or timeout. + return tea.Batch( + func() tea.Msg { + <-m.agentProc.Done() + return agentExitedMsg{} + }, + tea.Tick(5*time.Second, func(time.Time) tea.Msg { + return shutdownTimeoutMsg{} + }), + ) +} + +func (m *consoleModel) updateTextMode(msg tea.KeyMsg) (tea.Model, tea.Cmd) { + switch msg.String() { + case "ctrl+c": + return m, m.beginShutdown() + case "ctrl+t": + return m, m.switchToAudio() + case "esc": + if m.showShortcuts { + m.showShortcuts = false + return m, nil + } + return m, m.switchToAudio() + case "?": + if m.textInput.Value() == "" { + m.showShortcuts = !m.showShortcuts + return m, nil + } + case "enter": + if m.waitingForAgent { + return m, nil + } + text := strings.TrimSpace(m.textInput.Value()) + if text != "" { + m.reqCounter++ + reqID := fmt.Sprintf("console-%d", m.reqCounter) + m.textInput.SetValue("") + m.waitingForAgent = true + + // Print user message matching the old console format: + // ● You + // text here + printCmd := tea.Println( + "\n " + lipgloss.NewStyle().Foreground(lkCyan).Render("● ") + + cyanBoldStyle.Render("You") + + "\n " + text + "\n", + ) + + req := &agent.SessionRequest{ + RequestId: reqID, + Request: &agent.SessionRequest_RunInput_{ + RunInput: &agent.SessionRequest_RunInput{Text: text}, + }, + } + go m.pipeline.SendRequest(req) + return m, tea.Batch(printCmd, consoleTickCmd()) + } + return m, nil + } + + m.audioError = "" // clear on any key press + var cmd tea.Cmd + m.textInput, cmd = m.textInput.Update(msg) + return m, cmd +} + +func (m *consoleModel) handleSessionEvent(ev *agent.AgentSessionEvent) []tea.Cmd { + if ev == nil { + return nil + } + var cmds []tea.Cmd + + switch e := ev.Event.(type) { + case *agent.AgentSessionEvent_AgentStateChanged_: + if e.AgentStateChanged.NewState == agent.AgentState_AS_THINKING { + m.metricsText = "" + } + + case *agent.AgentSessionEvent_UserInputTranscribed_: + if e.UserInputTranscribed.IsFinal { + m.partialTranscript = "" + if text := e.UserInputTranscribed.Transcript; text != "" { + cmds = append(cmds, tea.Println( + "\n "+lipgloss.NewStyle().Foreground(lkCyan).Render("● ")+ + cyanBoldStyle.Render("You")+ + "\n "+text+"\n", + )) + } + } else { + m.partialTranscript = e.UserInputTranscribed.Transcript + } + + case *agent.AgentSessionEvent_ConversationItemAdded_: + if item := e.ConversationItemAdded.Item; item != nil { + // Extract metrics from ChatMessage (matching Python console pattern) + if msg := item.GetMessage(); msg != nil { + if text := formatMetrics(msg.Metrics); text != "" { + m.metricsText = text + } + } + cmds = append(cmds, tea.Println(formatChatItem(item))) + } + + case *agent.AgentSessionEvent_FunctionToolsExecuted_: + ft := e.FunctionToolsExecuted + outputsByCallID := make(map[string]*agent.FunctionCallOutput) + for _, fco := range ft.FunctionCallOutputs { + outputsByCallID[fco.CallId] = fco + } + var b strings.Builder + for i, fc := range ft.FunctionCalls { + if i > 0 { + b.WriteString("\n") + } + b.WriteString("\n ") + b.WriteString("● ") + b.WriteString("function_tool: ") + b.WriteString(fc.Name) + if fco, ok := outputsByCallID[fc.CallId]; ok { + if fco.IsError { + b.WriteString("\n ") + b.WriteString(redBoldStyle.Render("✗ ")) + b.WriteString(redStyle.Render(truncateOutput(fco.Output))) + } else { + b.WriteString("\n ") + b.WriteString(greenStyle.Render("✓ ")) + b.WriteString(dimStyle.Render(summarizeOutput(fco.Output))) + } + } + } + b.WriteString("\n") + cmds = append(cmds, tea.Println(b.String())) + + case *agent.AgentSessionEvent_Error_: + cmds = append(cmds, tea.Println( + " "+redBoldStyle.Render("✗ ")+redStyle.Render(e.Error.Message), + )) + } + + return cmds +} + +func formatChatItem(item *agent.ChatContext_ChatItem) string { + switch i := item.Item.(type) { + case *agent.ChatContext_ChatItem_Message: + msg := i.Message + if msg.Role == agent.ChatRole_USER { + return "" + } + var textParts []string + for _, c := range msg.Content { + if t := c.GetText(); t != "" { + textParts = append(textParts, t) + } + } + text := strings.Join(textParts, "") + if text == "" { + return "" + } + + var b strings.Builder + b.WriteString("\n ") + b.WriteString(lipgloss.NewStyle().Foreground(lkGreen).Render("● ")) + b.WriteString(greenBoldStyle.Render("Agent")) + for _, tl := range strings.Split(text, "\n") { + b.WriteString("\n ") + b.WriteString(tl) + } + b.WriteString("\n") + return b.String() + + case *agent.ChatContext_ChatItem_AgentHandoff: + h := i.AgentHandoff + old := "" + if h.OldAgentId != nil && *h.OldAgentId != "" { + old = dimStyle.Render(*h.OldAgentId) + " → " + } + return " " + lipgloss.NewStyle().Foreground(lkPurple).Render("● ") + + dimStyle.Render("handoff: ") + old + labelStyle.Render(h.NewAgentId) + } + return "" +} + +// ────────────────────────────────────────────────────────────────── +// View — compact status area at the bottom (not fullscreen). +// Logs and conversation scroll up via tea.Println. +// Layout matches the old Python console (FrequencyVisualizer + prompt). +// ────────────────────────────────────────────────────────────────── + +func (m consoleModel) View() string { + var b strings.Builder + + if m.shuttingDown { + b.WriteString("\n ") + b.WriteString(labelStyle.Render("Shutting down agent...")) + b.WriteString(" ") + b.WriteString(dimStyle.Render("ctrl+C to force")) + b.WriteString("\n") + return b.String() + } + + if m.textMode { + if m.waitingForAgent { + // Braille spinner (matching Rich's "dots" spinner) + frame := spinnerFrames[int(time.Now().UnixMilli()/80)%len(spinnerFrames)] + b.WriteString(" " + dimStyle.Render(frame+" thinking")) + } else { + // ── Text input ── + w := m.width + if w <= 0 { + w = 80 + } + sep := dimStyle.Render(strings.Repeat("─", min(w, 80))) + b.WriteString(sep) + b.WriteString("\n") + b.WriteString(m.textInput.View()) + b.WriteString("\n") + b.WriteString(sep) + } + + if m.audioError != "" { + b.WriteString("\n") + b.WriteString(" " + redStyle.Render("audio: "+m.audioError)) + } + + if m.showShortcuts { + b.WriteString("\n") + m.writeShortcutsInline(&b, []shortcut{ + {"Ctrl+T", "audio mode"}, + {"Ctrl+C", "exit"}, + }) + } else { + b.WriteString("\n") + b.WriteString(dimStyle.Render(" ? for shortcuts")) + } + } else { + // ── Audio visualizer (matching old Python FrequencyVisualizer) ── + b.WriteString(" ") + b.WriteString(labelStyle.Render(m.inputDev)) + b.WriteString(" ") + bands := m.pipeline.FFTBands() + for _, band := range bands { + idx := int(band * float64(len(blocks)-1)) + if idx >= len(blocks) { + idx = len(blocks) - 1 + } + if idx < 0 { + idx = 0 + } + b.WriteString(" ") + b.WriteString(blocks[idx]) + } + + if m.pipeline.Muted() { + b.WriteString(" ") + b.WriteString(redBoldStyle.Render("MUTED")) + } + + // Partial transcription on same line (dim) + if m.partialTranscript != "" { + b.WriteString(" ") + b.WriteString(dimStyle.Render("● " + m.partialTranscript + "...")) + } + + // ERLE > 6dB means the AEC is actively cancelling echo — show as a + // reassuring status indicator, not a warning. + if m.pipeline.IsPlaying() { + if stats := m.pipeline.AECStats(); stats != nil && stats.HasERLE && stats.EchoReturnLossEnhancement > 2 { + b.WriteString(" ") + b.WriteString(dimStyle.Render("echo cancelling")) + } + } + + // Metrics on same line (right side) + if m.metricsText != "" { + b.WriteString(" ") + b.WriteString(m.metricsText) + } + + if m.showShortcuts { + b.WriteString("\n") + m.writeShortcutsInline(&b, []shortcut{ + {"m", "mute/unmute"}, + {"Ctrl+T", "text mode"}, + {"q", "quit"}, + }) + } else { + b.WriteString("\n") + b.WriteString(dimStyle.Render(" ? for shortcuts")) + } + } + + return b.String() +} + +type shortcut struct { + key string + desc string +} + +func (m consoleModel) writeShortcutsInline(b *strings.Builder, shortcuts []shortcut) { + dimBoldStyle := lipgloss.NewStyle().Faint(true).Bold(true) + b.WriteString(" ") + for i, s := range shortcuts { + if i > 0 { + b.WriteString(dimStyle.Render(" · ")) + } + b.WriteString(dimBoldStyle.Render(s.key)) + b.WriteString(" ") + b.WriteString(dimStyle.Render(s.desc)) + } +} + +// formatMetrics formats a MetricsReport matching the Python console display. +func formatMetrics(m *agent.MetricsReport) string { + if m == nil { + return "" + } + + var parts []string + sep := dimStyle.Render(" · ") + + if m.LlmNodeTtft != nil { + parts = append(parts, dimStyle.Render("llm_ttft ")+dimStyle.Render(formatMs(*m.LlmNodeTtft))) + } + if m.TtsNodeTtfb != nil { + parts = append(parts, dimStyle.Render("tts_ttfb ")+dimStyle.Render(formatMs(*m.TtsNodeTtfb))) + } + if m.E2ELatency != nil { + label := "e2e " + formatMs(*m.E2ELatency) + if *m.E2ELatency >= 1.0 { + parts = append(parts, redStyle.Render(label)) + } else { + parts = append(parts, dimStyle.Render(label)) + } + } + + if len(parts) == 0 { + return "" + } + return strings.Join(parts, sep) +} + +func formatMs(seconds float64) string { + ms := seconds * 1000 + if ms >= 100 { + return fmt.Sprintf("%.0fms", ms) + } + return fmt.Sprintf("%.1fms", ms) +} + +// summarizeOutput tries to parse JSON and produce a "key=value, key=value" summary +// matching the old Python console behavior. Falls back to truncation. +func summarizeOutput(output string) string { + jsonStart := strings.Index(output, "{") + if jsonStart < 0 { + return truncateOutput(output) + } + + var data map[string]any + if err := json.Unmarshal([]byte(output[jsonStart:]), &data); err != nil { + return truncateOutput(output) + } + + var parts []string + for k, v := range data { + if v == nil || k == "type" { + continue + } + parts = append(parts, fmt.Sprintf("%s=%v", k, v)) + if len(parts) >= 3 { + break + } + } + result := strings.Join(parts, ", ") + if len(data) > 3 { + result += ", ..." + } + if result == "" { + return truncateOutput(output) + } + return result +} + +func truncateOutput(output string) string { + if len(output) > 200 { + return output[:197] + "..." + } + return output +} diff --git a/cmd/lk/main.go b/cmd/lk/main.go index 030af653..0892c571 100644 --- a/cmd/lk/main.go +++ b/cmd/lk/main.go @@ -22,6 +22,7 @@ import ( "strings" "syscall" + "github.com/charmbracelet/lipgloss" "github.com/urfave/cli/v3" "github.com/livekit/protocol/logger" @@ -90,7 +91,8 @@ func main() { checkForLegacyName() if err := app.Run(ctx, os.Args); err != nil { - fmt.Fprintln(os.Stderr, err) + errStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("1")) + fmt.Fprintln(os.Stderr, errStyle.Render(err.Error())) os.Exit(1) } } diff --git a/cmd/lk/simulate.go b/cmd/lk/simulate.go new file mode 100644 index 00000000..18ec2d1a --- /dev/null +++ b/cmd/lk/simulate.go @@ -0,0 +1,221 @@ +// Copyright 2025 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "encoding/json" + "fmt" + "math/rand" + "os" + "time" + + tea "github.com/charmbracelet/bubbletea" + "github.com/urfave/cli/v3" + + "github.com/livekit/livekit-cli/v2/pkg/agentfs" + "github.com/livekit/livekit-cli/v2/pkg/config" + "github.com/livekit/protocol/livekit" + lksdk "github.com/livekit/server-sdk-go/v2" +) + +var ( + simulateProjectConfig *config.ProjectConfig +) + +var simulateCommand = &cli.Command{ + Name: "simulate", + Usage: "Run agent simulations against LiveKit Cloud", + Before: func(ctx context.Context, cmd *cli.Command) (context.Context, error) { + pc, err := loadProjectDetails(cmd) + if err != nil { + return nil, err + } + simulateProjectConfig = pc + return nil, nil + }, + Action: runSimulate, + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "num-simulations", + Aliases: []string{"n"}, + Usage: "Number of scenarios to generate", + Value: 5, + }, + &cli.StringFlag{ + Name: "description", + Usage: "Agent description for scenario generation", + }, + &cli.StringFlag{ + Name: "scenario-group-id", + Usage: "Use a pre-configured scenario group", + }, + &cli.StringFlag{ + Name: "config", + Usage: "Path to simulation config `FILE`", + }, + &cli.StringFlag{ + Name: "entrypoint", + Usage: "Agent entrypoint `FILE` (default: agent.py)", + }, + }, +} + +// simulationConfig represents the simulation.json config file. +type simulationConfig struct { + AgentDescription string `json:"agent_description"` + Scenarios []scenarioConfig `json:"scenarios"` +} + +type scenarioConfig struct { + Label string `json:"label"` + Instructions string `json:"instructions"` + AgentExpectations string `json:"agent_expectations"` + Metadata map[string]string `json:"metadata"` +} + +func loadSimulationConfig(path string) (*simulationConfig, error) { + if path == "" { + return nil, nil + } + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read config: %w", err) + } + var cfg simulationConfig + if err := json.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("failed to parse config: %w", err) + } + return &cfg, nil +} + +func generateAgentName() string { + const chars = "abcdefghijklmnopqrstuvwxyz0123456789" + b := make([]byte, 8) + for i := range b { + b[i] = chars[rand.Intn(len(chars))] + } + return "simulation-" + string(b) +} + +// simulateMode represents how scenarios are sourced. +type simulateMode int + +const ( + modeInlineScenarios simulateMode = iota + modeScenarioGroup + modeGenerateFromDescription + modeGenerateFromSource +) + +func runSimulate(ctx context.Context, cmd *cli.Command) error { + pc := simulateProjectConfig + + configPath := cmd.String("config") + cfg, err := loadSimulationConfig(configPath) + if err != nil { + return err + } + + description := cmd.String("description") + if description == "" && cfg != nil { + description = cfg.AgentDescription + } + + numSimulations := int32(cmd.Int("num-simulations")) + scenarioGroupID := cmd.String("scenario-group-id") + agentName := generateAgentName() + + // Mode detection (checked in priority order) + var mode simulateMode + switch { + case cfg != nil && len(cfg.Scenarios) > 0: + mode = modeInlineScenarios + case scenarioGroupID != "": + mode = modeScenarioGroup + case description != "": + mode = modeGenerateFromDescription + default: + mode = modeGenerateFromSource + } + + // Detect project type, walking up parent directories if needed + projectDir, projectType, err := agentfs.DetectProjectRoot(".") + if err != nil { + return err + } + if !projectType.IsPython() { + return fmt.Errorf("simulate currently only supports Python agents (detected: %s)", projectType) + } + + // Resolve entrypoint + entrypoint, err := findEntrypoint(projectDir, cmd.String("entrypoint"), projectType) + if err != nil { + return err + } + + simClient := lksdk.NewAgentSimulationClient(serverURL, pc.APIKey, pc.APISecret) + + m := newSimulateModel(&simulateConfig{ + ctx: ctx, + client: simClient, + pc: pc, + numSimulations: numSimulations, + mode: mode, + description: description, + agentName: agentName, + projectDir: projectDir, + projectType: projectType, + entrypoint: entrypoint, + cfg: cfg, + scenarioGroupID: scenarioGroupID, + }) + + p := tea.NewProgram(m, tea.WithAltScreen()) + if _, err := p.Run(); err != nil { + return fmt.Errorf("TUI error: %w", err) + } + + if m.agent != nil { + m.agent.Kill() + if m.agent.LogPath != "" { + fmt.Fprintf(os.Stderr, "Agent logs: %s\n", m.agent.LogPath) + } + } + + if url := m.getDashboardURL(); url != "" { + fmt.Fprintf(os.Stderr, "Dashboard: %s\n", url) + } + + // Cancel the run — server will no-op if already terminal + if m.runID != "" && !m.runFinished { + cancelCtx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelFn() + if _, err := simClient.CancelSimulationRun(cancelCtx, &livekit.SimulationRun_Cancel_Request{ + SimulationRunId: m.runID, + }); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to cancel run: %v\n", err) + } else { + fmt.Fprintf(os.Stderr, "Run cancelled\n") + } + } + + if m.err != nil && m.err != context.Canceled { + return m.err + } + return nil +} + + diff --git a/cmd/lk/simulate_subprocess.go b/cmd/lk/simulate_subprocess.go new file mode 100644 index 00000000..4e7c9985 --- /dev/null +++ b/cmd/lk/simulate_subprocess.go @@ -0,0 +1,324 @@ +// Copyright 2025 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bufio" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "syscall" + "time" + + "github.com/livekit/livekit-cli/v2/pkg/agentfs" +) + +// AgentProcess manages a Python agent subprocess. +type AgentProcess struct { + cmd *exec.Cmd + readyCh chan struct{} + doneCh chan error + exitCh chan struct{} // closed when process exits, safe to read multiple times + shutdownCalled bool // true after Shutdown() sends SIGINT + + // LogStream receives log lines in real-time. Nil if not needed. + LogStream chan string + + mu sync.Mutex + logLines []string + maxLogs int + logFile *os.File + LogPath string +} + +// findPythonBinary locates a Python binary for the given project type. +func findPythonBinary(dir string, projectType agentfs.ProjectType) (string, []string, error) { + if projectType == agentfs.ProjectTypePythonUV { + uvPath, err := exec.LookPath("uv") + if err == nil { + return uvPath, []string{"run", "python"}, nil + } + } + + // Check common venv locations + for _, venvDir := range []string{".venv", "venv"} { + candidate := filepath.Join(dir, venvDir, "bin", "python") + if _, err := os.Stat(candidate); err == nil { + return candidate, nil, nil + } + } + + // Fall back to system python + pythonPath, err := exec.LookPath("python3") + if err != nil { + pythonPath, err = exec.LookPath("python") + if err != nil { + return "", nil, fmt.Errorf("could not find Python binary; ensure a virtual environment exists or Python is on PATH") + } + } + return pythonPath, nil, nil +} + +// findEntrypoint resolves the agent entrypoint file. +func findEntrypoint(dir, explicit string, projectType agentfs.ProjectType) (string, error) { + if explicit != "" { + path := explicit + if !filepath.IsAbs(path) { + path = filepath.Join(dir, path) + } + if _, err := os.Stat(path); err != nil { + return "", fmt.Errorf("entrypoint file not found: %s", explicit) + } + return explicit, nil + } + def := projectType.DefaultEntrypoint() + if def == "" { + def = "agent.py" + } + + // Check project root first + checked := []string{filepath.Join(dir, def)} + if _, err := os.Stat(checked[0]); err == nil { + return def, nil + } + + // Fall back to cwd-relative path (e.g. running from examples/drive-thru/) + cwd, _ := os.Getwd() + if rel, err := filepath.Rel(dir, cwd); err == nil && rel != "." { + candidate := filepath.Join(rel, def) + absCandidate := filepath.Join(dir, candidate) + checked = append(checked, absCandidate) + if _, err := os.Stat(absCandidate); err == nil { + return candidate, nil + } + } + + msg := "no agent entrypoint found, checked:\n" + for _, p := range checked { + msg += fmt.Sprintf(" - %s\n", p) + } + msg += "\nMake sure you are running this command from a directory containing a LiveKit agent.\n" + msg += "Use --entrypoint to specify the agent entrypoint file." + return "", fmt.Errorf("%s", msg) +} + +// AgentStartConfig configures how to launch an agent subprocess. +type AgentStartConfig struct { + Dir string + Entrypoint string + ProjectType agentfs.ProjectType + CLIArgs []string // e.g. ["start", "--url", "..."] or ["console", "--connect-addr", addr] + Env []string // e.g. ["LIVEKIT_AGENT_NAME=x"] or nil + ReadySignal string // substring to scan for in output (e.g. "registered worker"), empty to skip + ForwardOutput io.Writer // if set, forward each output line to this writer +} + +// startAgent launches a Python agent subprocess and monitors its output. +func startAgent(cfg AgentStartConfig) (*AgentProcess, error) { + pythonBin, prefixArgs, err := findPythonBinary(cfg.Dir, cfg.ProjectType) + if err != nil { + return nil, err + } + + args := append(prefixArgs, cfg.Entrypoint) + args = append(args, cfg.CLIArgs...) + cmd := exec.Command(pythonBin, args...) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Dir = cfg.Dir + if len(cfg.Env) > 0 { + cmd.Env = append(os.Environ(), cfg.Env...) + } + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("failed to create stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("failed to create stderr pipe: %w", err) + } + + logFile, err := os.CreateTemp("", "lk-simulate-*.log") + if err != nil { + return nil, fmt.Errorf("failed to create log file: %w", err) + } + + ap := &AgentProcess{ + cmd: cmd, + readyCh: make(chan struct{}), + doneCh: make(chan error, 1), + exitCh: make(chan struct{}), + maxLogs: 200, + logFile: logFile, + LogPath: logFile.Name(), + } + + if err := cmd.Start(); err != nil { + logFile.Close() + os.Remove(logFile.Name()) + return nil, fmt.Errorf("failed to start agent: %w", err) + } + + // Capture output from both stdout and stderr + readyOnce := sync.Once{} + scanOutput := func(r io.Reader) { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for scanner.Scan() { + line := scanner.Text() + ap.appendLog(line) + if cfg.ForwardOutput != nil { + fmt.Fprintln(cfg.ForwardOutput, line) + } + if cfg.ReadySignal != "" && strings.Contains(line, cfg.ReadySignal) { + readyOnce.Do(func() { close(ap.readyCh) }) + } + } + } + + // If no ready signal, mark ready immediately + if cfg.ReadySignal == "" { + close(ap.readyCh) + } + + var scanWg sync.WaitGroup + scanWg.Add(2) + go func() { defer scanWg.Done(); scanOutput(stdout) }() + go func() { defer scanWg.Done(); scanOutput(stderr) }() + go func() { + ap.doneCh <- cmd.Wait() + close(ap.exitCh) + scanWg.Wait() + if ap.LogStream != nil { + close(ap.LogStream) + } + }() + + return ap, nil +} + +func (ap *AgentProcess) appendLog(line string) { + ap.mu.Lock() + defer ap.mu.Unlock() + ap.logLines = append(ap.logLines, line) + if len(ap.logLines) > ap.maxLogs { + ap.logLines = ap.logLines[len(ap.logLines)-ap.maxLogs:] + } + if ap.logFile != nil { + fmt.Fprintln(ap.logFile, line) + } + if ap.LogStream != nil { + select { + case ap.LogStream <- line: + default: + } + } +} + +// Ready returns a channel that is closed when the agent worker has registered. +func (ap *AgentProcess) Ready() <-chan struct{} { + return ap.readyCh +} + +// Done returns a channel that receives the process exit error. +func (ap *AgentProcess) Done() <-chan error { + return ap.doneCh +} + +// RecentLogs returns the last n log lines from the subprocess. +func (ap *AgentProcess) RecentLogs(n int) []string { + ap.mu.Lock() + defer ap.mu.Unlock() + if n >= len(ap.logLines) { + result := make([]string, len(ap.logLines)) + copy(result, ap.logLines) + return result + } + result := make([]string, n) + copy(result, ap.logLines[len(ap.logLines)-n:]) + return result +} + +// LogCount returns the total number of log lines captured. +func (ap *AgentProcess) LogCount() int { + ap.mu.Lock() + defer ap.mu.Unlock() + return len(ap.logLines) +} + +// Kill sends SIGINT to the process group and SIGKILL after a timeout. +// If Shutdown() was already called, it just waits for exit (no duplicate SIGINT). +func (ap *AgentProcess) Kill() { + if ap.cmd.Process == nil { + return + } + // Already exited — nothing to do. + select { + case <-ap.exitCh: + ap.closeLogFile() + return + default: + } + if !ap.shutdownCalled { + ap.signalGroup(syscall.SIGINT) + } + select { + case <-ap.exitCh: + case <-time.After(5 * time.Second): + ap.signalGroup(syscall.SIGKILL) + } + ap.closeLogFile() +} + +func (ap *AgentProcess) closeLogFile() { + ap.mu.Lock() + defer ap.mu.Unlock() + if ap.logFile != nil { + ap.logFile.Close() + ap.logFile = nil + } +} + +// Shutdown sends SIGINT to the main process to initiate graceful shutdown. +// Only signals the main process (not the group) so that Python manages +// its own child process cleanup without stray signal bouncing. +func (ap *AgentProcess) Shutdown() { + if ap.cmd.Process == nil { + return + } + ap.shutdownCalled = true + ap.cmd.Process.Signal(syscall.SIGINT) +} + +// ForceKill sends SIGKILL to the process group immediately. +func (ap *AgentProcess) ForceKill() { + if ap.cmd.Process == nil { + return + } + ap.signalGroup(syscall.SIGKILL) +} + +// signalGroup sends a signal to the entire process group (Setpgid must be true). +func (ap *AgentProcess) signalGroup(sig syscall.Signal) { + if ap.cmd.Process == nil { + return + } + // Negative PID signals the entire process group. + _ = syscall.Kill(-ap.cmd.Process.Pid, sig) +} diff --git a/cmd/lk/simulate_tui.go b/cmd/lk/simulate_tui.go new file mode 100644 index 00000000..2fa2a2b4 --- /dev/null +++ b/cmd/lk/simulate_tui.go @@ -0,0 +1,1267 @@ +// Copyright 2025 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "fmt" + "math/rand" + "os" + "strings" + "time" + + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + + "github.com/livekit/livekit-cli/v2/pkg/agentfs" + "github.com/livekit/livekit-cli/v2/pkg/config" + "github.com/livekit/server-sdk-go/v2/pkg/cloudagents" + agent "github.com/livekit/protocol/livekit/agent" + "github.com/livekit/protocol/livekit" + lksdk "github.com/livekit/server-sdk-go/v2" +) + +// --- Styles --- + +var ( + tagStyle = lipgloss.NewStyle().Background(lipgloss.Color("#1fd5f9")).Foreground(lipgloss.Color("#000000")).Bold(true).Padding(0, 1) + greenStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("2")) + redStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("1")) + yellowStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("3")) + dimStyle = lipgloss.NewStyle().Faint(true) + boldStyle = lipgloss.NewStyle().Bold(true) + reverseStyle = lipgloss.NewStyle().Reverse(true) + cyanStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("6")).Bold(true) + + simSpinnerFrames = []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"} +) + +// --- Message types --- + +type simulationRunMsg struct { + run *livekit.SimulationRun + err error +} + +type pollTickMsg struct{} +type spinnerTickMsg struct{} +type glowTickMsg struct{} + +type subprocessExitMsg struct { + err error +} + +// --- Filter --- + +const ( + filterAll = iota + filterFailed + filterPassed + filterRunning +) + +var filterNames = []string{"All", "Failed", "Passed", "Running"} + +// --- Model --- + +type step struct { + label string + status string // "pending", "running", "done", "failed" + elapsed time.Duration +} + +type simulateConfig struct { + ctx context.Context + client *lksdk.AgentSimulationClient + pc *config.ProjectConfig + numSimulations int32 + mode simulateMode + description string + agentName string + projectDir string + projectType agentfs.ProjectType + entrypoint string + cfg *simulationConfig + scenarioGroupID string +} + +type simulateModel struct { + config *simulateConfig + client *lksdk.AgentSimulationClient + runID string + agent *AgentProcess + setupCancel context.CancelFunc + + // Setup phase + steps []step + setupDone bool + + // Run phase + run *livekit.SimulationRun + runFinished bool + numSimulations int32 + startTime time.Time + genStart time.Time + + quoteIdx int + quoteTick int + spinnerIdx int + glowIdx int + + filter int + cursor int + scrollOff int + detailJobID string + showLogs bool + showDescription bool + + width int + height int + err error +} + +type quote struct { + text string + glow bool // iconic quotes get a subtle glow + weight int // higher = more likely to appear +} + +var simulationQuotes = []quote{ + // Iconic — glow sweep, high weight + {"There is no spoon.", true, 5}, // Spoon Boy — The Matrix + {"What is real? How do you define real?", true, 5}, // Morpheus — The Matrix + {"Wake up, Neo.", true, 5}, // Trinity — The Matrix + {"Free your mind.", true, 4}, // Morpheus — The Matrix + {"Welcome to the real world.", true, 4}, // Morpheus — The Matrix + {"Shall we play a game?", true, 4}, // WOPR — WarGames + {"Open the pod bay doors, HAL.", true, 4}, // Dave — 2001: A Space Odyssey + {"The Matrix is everywhere. It is all around us.", true, 3}, // Morpheus — The Matrix + // Well-known — no glow, medium weight + {"Do not try and bend the spoon. That's impossible.", false, 3}, // Spoon Boy — The Matrix + {"The only winning move is not to play.", false, 3}, // WarGames + {"These violent delights have violent ends.", false, 3}, // Westworld + {"I think, therefore I am.", false, 3}, // René Descartes + {"Unfortunately, no one can be told what the Matrix is.", false, 2}, // Morpheus — The Matrix + {"Ever had that feeling where you're not sure if you're awake or still dreaming?", false, 2}, // Neo — The Matrix + {"I can only show you the door. You're the one that has to walk through it.", false, 2}, // Morpheus — The Matrix + {"Remember, all I'm offering is the truth. Nothing more.", false, 2}, // Morpheus — The Matrix + // Niche — low weight + {"I don't like the idea that I'm not in control of my life.", false, 1}, // Neo — The Matrix + {"Choice is an illusion created between those with power and those without.", false, 1}, // Merovingian — The Matrix Reloaded + {"The odds that we are in base reality is one in billions.", false, 1}, // Elon Musk + {"The world, then, is a radical illusion.", false, 1}, // Jean Baudrillard + {"That's all it is. Information.", false, 1}, // Ghost in the Shell + {"Not one single bit of it is real.", false, 1}, // The Metamorphosis of Prime Intellect + {"I wish I had a good argument against it.", false, 1}, // Neil deGrasse Tyson + // Playful + {"Warming up the neural pathways...", false, 1}, + {"Reticulating splines...", false, 1}, // SimCity + {"Generating plausible humans...", false, 1}, + {"Convincing the AI to cooperate...", false, 2}, + {"Teaching robots to small talk...", false, 1}, +} + +// weightedQuotePool builds a flat slice with quotes repeated by weight for random selection. +var weightedQuotePool = func() []int { + var pool []int + for i, q := range simulationQuotes { + for range q.weight { + pool = append(pool, i) + } + } + return pool +}() + +func newSimulateModel(config *simulateConfig) *simulateModel { + return &simulateModel{ + config: config, + client: config.client, + numSimulations: config.numSimulations, + quoteIdx: weightedQuotePool[rand.Intn(len(weightedQuotePool))], + width: 80, + height: 24, + } +} + +// --- Setup messages --- + +type setupStepMsg struct { + stepIdx int + elapsed []time.Duration // elapsed time per completed step + err error + runID string + agent *AgentProcess +} + +func (m *simulateModel) Init() tea.Cmd { + return tea.Batch( + m.runSetup(), + tickCmd(), + spinnerTickCmd(), + glowTickCmd(), + ) +} + +func (m *simulateModel) runSetup() tea.Cmd { + c := m.config + + // Determine which steps to show + m.steps = []step{ + {label: "Starting agent", status: "running"}, + {label: "Creating simulation", status: "pending"}, + } + if c.mode == modeGenerateFromSource { + m.steps = append(m.steps, step{label: "Uploading source", status: "pending"}) + } + + ctx, cancel := context.WithCancel(c.ctx) + m.setupCancel = cancel + + return func() tea.Msg { + var elapsed []time.Duration + stepStart := time.Now() + + // Step 0: Start agent & wait for registration + agent, err := startAgent(AgentStartConfig{ + Dir: c.projectDir, + Entrypoint: c.entrypoint, + ProjectType: c.projectType, + CLIArgs: []string{ + "start", + "--url", c.pc.URL, + "--api-key", c.pc.APIKey, + "--api-secret", c.pc.APISecret, + }, + Env: []string{ + "LIVEKIT_AGENT_NAME=" + c.agentName, + "LIVEKIT_URL=" + c.pc.URL, + "LIVEKIT_API_KEY=" + c.pc.APIKey, + "LIVEKIT_API_SECRET=" + c.pc.APISecret, + }, + ReadySignal: "registered worker", + }) + if err != nil { + return setupStepMsg{stepIdx: 0, err: fmt.Errorf("failed to start agent: %w", err)} + } + + // Wait for agent ready + timeout := time.NewTimer(10 * time.Second) + defer timeout.Stop() + select { + case <-agent.Ready(): + case err := <-agent.Done(): + if err != nil { + return setupStepMsg{stepIdx: 0, err: fmt.Errorf("agent exited before registering: %w", err), agent: agent} + } + return setupStepMsg{stepIdx: 0, err: fmt.Errorf("agent exited before registering"), agent: agent} + case <-timeout.C: + return setupStepMsg{stepIdx: 0, err: fmt.Errorf("timed out waiting for agent to register (10s)"), agent: agent} + case <-ctx.Done(): + return setupStepMsg{stepIdx: 0, err: ctx.Err(), agent: agent} + } + elapsed = append(elapsed, time.Since(stepStart)) + stepStart = time.Now() + + // Step 1: Create simulation run + req := &livekit.SimulationRun_Create_Request{ + AgentName: c.agentName, + AgentDescription: c.description, + NumSimulations: c.numSimulations, + } + switch c.mode { + case modeInlineScenarios: + scenarios := make([]*livekit.SimulationRun_Create_Scenario, 0, len(c.cfg.Scenarios)) + for _, sc := range c.cfg.Scenarios { + scenarios = append(scenarios, &livekit.SimulationRun_Create_Scenario{ + Label: sc.Label, + Instructions: sc.Instructions, + AgentExpectations: sc.AgentExpectations, + Metadata: sc.Metadata, + }) + } + req.Source = &livekit.SimulationRun_Create_Request_Scenarios{ + Scenarios: &livekit.SimulationRun_Create_Scenarios{ + Scenarios: scenarios, + }, + } + case modeScenarioGroup: + req.Source = &livekit.SimulationRun_Create_Request_GroupId{ + GroupId: c.scenarioGroupID, + } + } + + resp, err := c.client.CreateSimulationRun(ctx, req) + if err != nil { + return setupStepMsg{stepIdx: 1, err: fmt.Errorf("failed to create simulation: %w", err), agent: agent} + } + elapsed = append(elapsed, time.Since(stepStart)) + stepStart = time.Now() + runID := resp.SimulationRunId + + // Step 2: Upload source (if needed) + if c.mode == modeGenerateFromSource { + presigned := resp.PresignedPostRequest + if presigned == nil { + return setupStepMsg{stepIdx: 2, err: fmt.Errorf("server did not return upload URL"), agent: agent, runID: runID} + } + + sourceDir, _ := os.Getwd() + var buf bytes.Buffer + if err := cloudagents.CreateSourceTarball(os.DirFS(sourceDir), nil, &buf); err != nil { + return setupStepMsg{stepIdx: 2, err: fmt.Errorf("failed to create source archive: %w", err), agent: agent, runID: runID} + } + if err := cloudagents.MultipartUpload(presigned.Url, presigned.Values, &buf); err != nil { + return setupStepMsg{stepIdx: 2, err: fmt.Errorf("failed to upload source: %w", err), agent: agent, runID: runID} + } + if _, err := c.client.ConfirmSimulationSourceUpload(ctx, &livekit.SimulationRun_ConfirmSourceUpload_Request{ + SimulationRunId: runID, + }); err != nil { + return setupStepMsg{stepIdx: 2, err: fmt.Errorf("failed to confirm upload: %w", err), agent: agent, runID: runID} + } + elapsed = append(elapsed, time.Since(stepStart)) + } + + // All done + lastStep := len(m.steps) - 1 + return setupStepMsg{stepIdx: lastStep, elapsed: elapsed, agent: agent, runID: runID} + } +} + +func tickCmd() tea.Cmd { + return tea.Tick(time.Second, func(t time.Time) tea.Msg { + return pollTickMsg{} + }) +} + +func spinnerTickCmd() tea.Cmd { + return tea.Tick(80*time.Millisecond, func(t time.Time) tea.Msg { + return spinnerTickMsg{} + }) +} + +func glowTickCmd() tea.Cmd { + return tea.Tick(40*time.Millisecond, func(t time.Time) tea.Msg { + return glowTickMsg{} + }) +} + +func (m *simulateModel) pollSimulation() tea.Cmd { + return func() tea.Msg { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + resp, err := m.client.GetSimulationRun(ctx, &livekit.SimulationRun_Get_Request{ + SimulationRunId: m.runID, + }) + if err != nil { + return simulationRunMsg{err: err} + } + return simulationRunMsg{run: resp.Run} + } +} + +func (m *simulateModel) waitSubprocess() tea.Cmd { + if m.agent == nil { + return nil + } + return func() tea.Msg { + err := <-m.agent.Done() + return subprocessExitMsg{err: err} + } +} + +func (m *simulateModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + case tea.WindowSizeMsg: + m.width = msg.Width + m.height = msg.Height + + case setupStepMsg: + if msg.agent != nil { + m.agent = msg.agent + } + if msg.runID != "" { + m.runID = msg.runID + } + if msg.err != nil { + // Mark current step as failed + if msg.stepIdx < len(m.steps) { + m.steps[msg.stepIdx].status = "failed" + } + m.err = msg.err + m.setupDone = true + m.runFinished = true + return m, nil + } + // Mark all steps up to and including this one as done + for i := 0; i <= msg.stepIdx && i < len(m.steps); i++ { + m.steps[i].status = "done" + if i < len(msg.elapsed) { + m.steps[i].elapsed = msg.elapsed[i] + } + } + // If all steps are done, start polling + if msg.stepIdx >= len(m.steps)-1 { + m.setupDone = true + m.genStart = time.Now() + return m, tea.Batch(m.pollSimulation(), m.waitSubprocess()) + } + // Mark next step as running + if msg.stepIdx+1 < len(m.steps) { + m.steps[msg.stepIdx+1].status = "running" + } + return m, nil + + case simulationRunMsg: + if msg.err == nil && msg.run != nil { + m.run = msg.run + if m.startTime.IsZero() && msg.run.Status == livekit.SimulationRun_STATUS_RUNNING { + m.startTime = time.Now() + } + if msg.run.Status == livekit.SimulationRun_STATUS_COMPLETED || + msg.run.Status == livekit.SimulationRun_STATUS_FAILED || + msg.run.Status == livekit.SimulationRun_STATUS_CANCELLED { + m.runFinished = true + } + } + + case spinnerTickMsg: + m.spinnerIdx++ + return m, spinnerTickCmd() + + case glowTickMsg: + m.glowIdx++ + return m, glowTickCmd() + + case pollTickMsg: + m.quoteTick++ + if m.quoteTick%60 == 0 { + m.quoteIdx = weightedQuotePool[rand.Intn(len(weightedQuotePool))] + } + var cmds []tea.Cmd + if m.setupDone && !m.runFinished { + cmds = append(cmds, m.pollSimulation()) + } + cmds = append(cmds, tickCmd()) + return m, tea.Batch(cmds...) + + case subprocessExitMsg: + // Subprocess exited — don't quit TUI, just note it + + case tea.KeyMsg: + return m.handleKey(msg) + } + return m, nil +} + +func (m *simulateModel) handleKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { + switch msg.String() { + case "ctrl+c": + if m.setupCancel != nil { + m.setupCancel() + } + return m, tea.Quit + case "ctrl+l": + m.showLogs = !m.showLogs + case "d": + if m.detailJobID == "" { + m.showDescription = !m.showDescription + } + case "up", "shift+tab": + m.cursor-- + case "down", "tab": + m.cursor++ + case "pgup": + m.cursor -= 20 + case "pgdown": + m.cursor += 20 + case "left": + m.filter = (m.filter + len(filterNames) - 1) % len(filterNames) + m.cursor = 0 + m.scrollOff = 0 + case "right": + m.filter = (m.filter + 1) % len(filterNames) + m.cursor = 0 + m.scrollOff = 0 + case "enter": + if m.detailJobID == "" { + jobs := m.filteredJobs() + if m.cursor >= 0 && m.cursor < len(jobs) { + m.detailJobID = jobs[m.cursor].job.Id + } + } + case "esc", "backspace": + if m.detailJobID != "" { + m.detailJobID = "" + } + case "q": + if m.detailJobID != "" { + m.detailJobID = "" + } else { + return m, tea.Quit + } + } + return m, nil +} + +type indexedJob struct { + origIdx int + job *livekit.SimulationRun_Job +} + +func (m *simulateModel) filteredJobs() []indexedJob { + if m.run == nil { + return nil + } + var result []indexedJob + for i, j := range m.run.Jobs { + match := false + switch m.filter { + case filterAll: + match = true + case filterFailed: + match = j.Status == livekit.SimulationRun_Job_STATUS_FAILED + case filterPassed: + match = j.Status == livekit.SimulationRun_Job_STATUS_COMPLETED + case filterRunning: + match = j.Status == livekit.SimulationRun_Job_STATUS_RUNNING + } + if match { + result = append(result, indexedJob{origIdx: i + 1, job: j}) + } + } + return result +} + +func (m *simulateModel) View() string { + // Setup phase or generating phase — show unified step view + if !m.setupDone || m.run == nil || m.run.Status == livekit.SimulationRun_STATUS_GENERATING { + return m.viewSetup() + } + switch m.run.Status { + case livekit.SimulationRun_STATUS_FAILED: + if len(m.run.Jobs) == 0 { + return m.viewFailed() + } + return m.viewRunning() + default: + return m.viewRunning() + } +} + +func (m *simulateModel) viewSetup() string { + var b strings.Builder + b.WriteString("\n") + b.WriteString(tagStyle.Render("Agent Simulation")) + b.WriteString("\n\n") + + if m.config.pc != nil && m.config.pc.Name != "" { + b.WriteString(dimStyle.Render(" Project: "+m.config.pc.Name) + "\n") + } + if m.config.pc != nil && m.config.pc.URL != "" { + b.WriteString(dimStyle.Render(" URL: "+m.config.pc.URL) + "\n") + } + if m.runID != "" { + b.WriteString(dimStyle.Render(" Run: "+m.runID) + "\n") + } + if url := m.getDashboardURL(); url != "" { + b.WriteString(dimStyle.Render(" "+url) + "\n") + } + b.WriteString("\n") + + b.WriteString(m.renderSteps()) + + // Show generation progress after setup completes + if m.setupDone && m.err == nil { + elapsed := time.Since(m.genStart).Truncate(time.Second) + b.WriteString(fmt.Sprintf(" %s Generating %d scenarios %s %s\n", yellowStyle.Render("●"), m.numSimulations, m.spinner(), dimStyle.Render(elapsed.String()))) + } + + if m.err != nil { + b.WriteString("\n") + b.WriteString(redStyle.Render(" "+m.err.Error()) + "\n") + if m.agent != nil { + b.WriteString("\n") + b.WriteString(m.renderLogs()) + } + b.WriteString("\n") + b.WriteString(dimStyle.Render(" q quit")) + b.WriteString("\n") + } else { + b.WriteString("\n") + if m.showLogs { + b.WriteString(m.renderLogs()) + } + b.WriteString(m.quoteAboveHint(" Ctrl+L logs")) + b.WriteString("\n") + } + return b.String() +} + +func (m *simulateModel) spinner() string { + return yellowStyle.Render(simSpinnerFrames[m.spinnerIdx%len(simSpinnerFrames)]) +} + +var quoteStyleDim = lipgloss.NewStyle().Foreground(lipgloss.Color("237")) + +// glowShades are brightness levels for the sweep effect (dark → bright → dark) +var glowShades = []lipgloss.Color{"237", "239", "242", "245", "248", "245", "242", "239", "237"} + +func (m *simulateModel) quote() string { + q := simulationQuotes[m.quoteIdx] + if !q.glow { + return quoteStyleDim.Render(q.text) + } + // Sweep a bright spot across the text, then stay dark for a long pause + runes := []rune(q.text) + sweepLen := len(runes) + len(glowShades) + cycleLen := sweepLen + 250 // ~10s pause at 40ms tick + center := m.glowIdx % cycleLen + if center >= sweepLen { + // In the pause phase — render all dim + return quoteStyleDim.Render(q.text) + } + var b strings.Builder + for i, r := range runes { + dist := center - i + if dist >= 0 && dist < len(glowShades) { + style := lipgloss.NewStyle().Foreground(glowShades[dist]) + if dist >= 2 && dist <= 6 { // italic only for the brightest chars + style = style.Italic(true) + } + b.WriteString(style.Render(string(r))) + } else { + b.WriteString(lipgloss.NewStyle().Foreground(lipgloss.Color("237")).Render(string(r))) + } + } + return b.String() +} + +func (m *simulateModel) renderSteps() string { + var b strings.Builder + for _, s := range m.steps { + switch s.status { + case "done": + elapsed := "" + if s.elapsed > 0 { + elapsed = " " + dimStyle.Render(s.elapsed.Round(time.Millisecond).String()) + } + b.WriteString(fmt.Sprintf(" %s %s%s\n", greenStyle.Render("✓"), s.label, elapsed)) + case "running": + b.WriteString(fmt.Sprintf(" %s %s\n", yellowStyle.Render("●"), s.label)) + case "failed": + b.WriteString(fmt.Sprintf(" %s %s\n", redStyle.Render("✗"), s.label)) + default: + b.WriteString(fmt.Sprintf(" %s %s\n", dimStyle.Render("○"), s.label)) + } + } + return b.String() +} + +func (m *simulateModel) getDashboardURL() string { + if m.runID == "" || m.config == nil || m.config.pc == nil || m.config.pc.ProjectId == "" { + return "" + } + return fmt.Sprintf("%s/projects/%s/agents/simulations/%s", dashboardURL, m.config.pc.ProjectId, m.runID) +} + +func (m *simulateModel) viewFailed() string { + var b strings.Builder + b.WriteString("\n") + b.WriteString(tagStyle.Render("Agent Simulation")) + b.WriteString(" ") + b.WriteString(dimStyle.Render(m.runID)) + b.WriteString("\n\n") + b.WriteString(" " + redStyle.Bold(true).Render("Failed") + "\n\n") + if m.run.Error != "" { + for _, line := range strings.Split(m.run.Error, "\n") { + b.WriteString(redStyle.Render(" "+line) + "\n") + } + } else { + b.WriteString(redStyle.Render(" (no error details available)") + "\n") + } + b.WriteString("\n") + if m.showLogs { + b.WriteString(m.renderLogs()) + } + b.WriteString(dimStyle.Render(" Ctrl+L logs · q quit")) + b.WriteString("\n") + return b.String() +} + +func (m *simulateModel) viewRunning() string { + var b strings.Builder + + b.WriteString("\n") + b.WriteString(tagStyle.Render("Agent Simulation")) + b.WriteString(" ") + b.WriteString(dimStyle.Render(m.runID)) + if url := m.getDashboardURL(); url != "" { + b.WriteString(" " + dimStyle.Render(url)) + } + b.WriteString("\n\n") + + // Agent description + if m.run != nil && m.run.AgentDescription != "" { + b.WriteString(boldStyle.Render(" Agent Description") + "\n") + if m.showDescription { + wrapped := dimStyle.Width(m.width - 4).Render(m.run.AgentDescription) + for _, line := range strings.Split(wrapped, "\n") { + b.WriteString(" " + line + "\n") + } + b.WriteString(dimStyle.Render(" (press d to collapse)") + "\n\n") + } else { + desc := firstMeaningfulLine(m.run.AgentDescription) + if desc != "" { + b.WriteString(dimStyle.Width(m.width-4).Render(" "+desc) + "\n") + b.WriteString(dimStyle.Render(" (press d to expand)") + "\n\n") + } + } + } + + // Header line + b.WriteString(m.renderHeader()) + b.WriteString("\n") + + // Progress counts + b.WriteString(m.renderCounts()) + b.WriteString("\n") + + // Filter tabs + b.WriteString(m.renderFilterTabs()) + b.WriteString("\n\n") + + if m.detailJobID != "" { + b.WriteString(m.renderDetail()) + } else { + b.WriteString(m.renderJobList()) + + // Show summary when run is completed and summary is available + if m.run.Summary != nil { + b.WriteString(m.renderSummary()) + } + } + + b.WriteString("\n") + if m.showLogs { + b.WriteString(m.renderLogs()) + } + b.WriteString(m.renderHint()) + b.WriteString("\n") + return b.String() +} + +func (m *simulateModel) renderHeader() string { + var label, style string + switch { + case m.run.Status == livekit.SimulationRun_STATUS_COMPLETED || m.run.Status == livekit.SimulationRun_STATUS_FAILED || m.run.Status == livekit.SimulationRun_STATUS_CANCELLED: + total, done, _, _ := m.jobCounts() + allJobsDone := total > 0 && done == total + if m.run.Status == livekit.SimulationRun_STATUS_CANCELLED { + label = "Cancelled" + style = "yellow" + } else if m.run.Status == livekit.SimulationRun_STATUS_FAILED && !allJobsDone { + label = "Failed" + style = "red" + } else { + label = "Completed" + style = "green" + if allJobsDone && m.run.Summary == nil { + label += " — summary unavailable" + } + } + case m.run.Status == livekit.SimulationRun_STATUS_SUMMARIZING: + label = "Summarizing..." + style = "yellow" + default: + label = "Running" + style = "yellow" + } + + header := boldStyle.Render("Simulation") + " — " + switch style { + case "green": + header += greenStyle.Bold(true).Render(label) + case "red": + header += redStyle.Bold(true).Render(label) + case "yellow": + header += yellowStyle.Bold(true).Render(label) + } + return " " + header +} + +func (m *simulateModel) jobCounts() (total, done, passed, failed int) { + if m.run == nil { + return + } + total = len(m.run.Jobs) + for _, j := range m.run.Jobs { + switch j.Status { + case livekit.SimulationRun_Job_STATUS_COMPLETED: + done++ + passed++ + case livekit.SimulationRun_Job_STATUS_FAILED: + done++ + failed++ + } + } + return +} + +func (m *simulateModel) renderCounts() string { + total, done, passed, failed := m.jobCounts() + running := 0 + if m.run != nil { + for _, j := range m.run.Jobs { + if j.Status == livekit.SimulationRun_Job_STATUS_RUNNING { + running++ + } + } + } + + var parts []string + parts = append(parts, boldStyle.Render(fmt.Sprintf("%d/%d", done, total))) + if passed > 0 { + parts = append(parts, greenStyle.Render(fmt.Sprintf("%d passed", passed))) + } + if failed > 0 { + parts = append(parts, redStyle.Render(fmt.Sprintf("%d failed", failed))) + } + if running > 0 { + parts = append(parts, yellowStyle.Render(fmt.Sprintf("%d running", running))) + } + + elapsed := "" + if !m.startTime.IsZero() { + d := time.Since(m.startTime) + secs := int(d.Seconds()) + mins := secs / 60 + secs = secs % 60 + if mins > 0 { + elapsed = fmt.Sprintf("%dm%02ds", mins, secs) + } else { + elapsed = fmt.Sprintf("%ds", secs) + } + } + + result := " " + strings.Join(parts, " ") + if elapsed != "" { + result += " " + dimStyle.Render(elapsed) + } + return result +} + +func (m *simulateModel) renderFilterTabs() string { + total, _, passed, failed := m.jobCounts() + running := 0 + if m.run != nil { + for _, j := range m.run.Jobs { + if j.Status == livekit.SimulationRun_Job_STATUS_RUNNING { + running++ + } + } + } + + counts := []int{total, failed, passed, running} + styles := []lipgloss.Style{lipgloss.NewStyle(), redStyle, greenStyle, yellowStyle} + + var parts []string + for i, name := range filterNames { + label := fmt.Sprintf("%s: %d", name, counts[i]) + if i == m.filter { + parts = append(parts, styles[i].Bold(true).Render(label)) + } else { + parts = append(parts, dimStyle.Render(label)) + } + } + return " " + strings.Join(parts, " ") +} + +func (m *simulateModel) renderJobList() string { + jobs := m.filteredJobs() + if len(jobs) == 0 { + return dimStyle.Render(" (no jobs match this filter)") + } + + // Clamp cursor + if m.cursor < 0 { + m.cursor = 0 + } + if m.cursor >= len(jobs) { + m.cursor = len(jobs) - 1 + } + + // Compute visible window + availHeight := m.height - 14 + if availHeight < 5 { + availHeight = 5 + } + + if m.cursor < m.scrollOff { + m.scrollOff = m.cursor + } else if m.cursor >= m.scrollOff+availHeight { + m.scrollOff = m.cursor - availHeight + 1 + } + if m.scrollOff < 0 { + m.scrollOff = 0 + } + if m.scrollOff > len(jobs)-availHeight { + m.scrollOff = len(jobs) - availHeight + } + if m.scrollOff < 0 { + m.scrollOff = 0 + } + + winStart := m.scrollOff + winEnd := m.scrollOff + availHeight + if winEnd > len(jobs) { + winEnd = len(jobs) + } + + var b strings.Builder + + if winStart > 0 { + b.WriteString(dimStyle.Render(fmt.Sprintf(" ... %d more above ...", winStart))) + b.WriteString("\n") + } + + for i := winStart; i < winEnd; i++ { + ij := jobs[i] + icon := jobIcon(ij.job) + instr := ij.job.Instructions + if len(instr) > 60 { + instr = instr[:60] + "..." + } + if instr == "" { + instr = "—" + } + + var line string + if i == m.cursor { + // Build without inner styles so reverse applies cleanly + line = fmt.Sprintf(" %s %3d. %s %s", icon, ij.origIdx, ij.job.Id, instr) + visible := lipgloss.Width(line) + if visible < m.width { + line += strings.Repeat(" ", m.width-visible) + } + line = reverseStyle.Render(line) + } else { + line = fmt.Sprintf(" %s %3d. %s %s", icon, ij.origIdx, dimStyle.Render(ij.job.Id), instr) + } + b.WriteString(line) + b.WriteString("\n") + } + + remaining := len(jobs) - winEnd + if remaining > 0 { + b.WriteString(dimStyle.Render(fmt.Sprintf(" ... %d more below ...", remaining))) + b.WriteString("\n") + } + + return b.String() +} + +func (m *simulateModel) renderDetail() string { + if m.run == nil { + return "" + } + var job *livekit.SimulationRun_Job + origIdx := 0 + for i, j := range m.run.Jobs { + if j.Id == m.detailJobID { + job = j + origIdx = i + 1 + break + } + } + if job == nil { + m.detailJobID = "" + return dimStyle.Render(" (job not found)\n") + } + + var b strings.Builder + b.WriteString("\n") + b.WriteString(fmt.Sprintf(" %s %s %s\n", + jobIcon(job), + boldStyle.Render(fmt.Sprintf("Job %d", origIdx)), + dimStyle.Render(job.Id), + )) + b.WriteString("\n") + + wrapWidth := m.width - 6 + if wrapWidth < 40 { + wrapWidth = 40 + } + wrapStyle := lipgloss.NewStyle().Width(wrapWidth) + + b.WriteString(boldStyle.Render(" Instructions:")) + b.WriteString("\n") + instr := job.Instructions + if instr == "" { + instr = "—" + } + for _, line := range strings.Split(wrapStyle.Render(instr), "\n") { + b.WriteString(" " + line + "\n") + } + b.WriteString("\n") + + b.WriteString(dimStyle.Bold(true).Render(" Expected:")) + b.WriteString("\n") + expect := job.AgentExpectations + if expect == "" { + expect = "—" + } + for _, line := range strings.Split(wrapStyle.Render(expect), "\n") { + b.WriteString(dimStyle.Render(" "+line) + "\n") + } + + if job.Error != "" { + b.WriteString("\n") + if job.Status == livekit.SimulationRun_Job_STATUS_COMPLETED { + b.WriteString(greenStyle.Bold(true).Render(" Result:")) + b.WriteString("\n") + for _, line := range strings.Split(wrapStyle.Render(job.Error), "\n") { + b.WriteString(greenStyle.Render(" "+line) + "\n") + } + } else { + b.WriteString(redStyle.Bold(true).Render(" Error:")) + b.WriteString("\n") + for _, line := range strings.Split(wrapStyle.Render(job.Error), "\n") { + b.WriteString(redStyle.Render(" "+line) + "\n") + } + } + } + + // Show chat transcript if available + b.WriteString(m.renderChatTranscript(job.Id)) + + return b.String() +} + +func (m *simulateModel) renderSummary() string { + summary := m.run.Summary + if summary == nil { + return "" + } + + var b strings.Builder + b.WriteString("\n") + b.WriteString(dimStyle.Render(" " + strings.Repeat("─", 40))) + b.WriteString("\n\n") + b.WriteString(" " + boldStyle.Render("Summary")) + b.WriteString(fmt.Sprintf(" %s %s\n\n", + greenStyle.Render(fmt.Sprintf("%d passed", summary.Passed)), + redStyle.Render(fmt.Sprintf("%d failed", summary.Failed)), + )) + + wrapWidth := m.width - 6 + if wrapWidth < 40 { + wrapWidth = 40 + } + + if summary.GoingWell != "" { + b.WriteString(greenStyle.Bold(true).Render(" Going well:")) + b.WriteString("\n") + wrapped := lipgloss.NewStyle().Width(wrapWidth).Render(summary.GoingWell) + for _, line := range strings.Split(wrapped, "\n") { + b.WriteString(" " + line + "\n") + } + b.WriteString("\n") + } + + if summary.ToImprove != "" { + b.WriteString(yellowStyle.Bold(true).Render(" To improve:")) + b.WriteString("\n") + wrapped := lipgloss.NewStyle().Width(wrapWidth).Render(summary.ToImprove) + for _, line := range strings.Split(wrapped, "\n") { + b.WriteString(" " + line + "\n") + } + b.WriteString("\n") + } + + if len(summary.Issues) > 0 { + b.WriteString(redStyle.Bold(true).Render(" Issues:")) + b.WriteString("\n") + issueWrap := wrapWidth - 4 // account for " N. " prefix + if issueWrap < 30 { + issueWrap = 30 + } + for i, issue := range summary.Issues { + prefix := fmt.Sprintf(" %d. ", i+1) + descWrapped := lipgloss.NewStyle().Width(issueWrap).Render(issue.Description) + for j, line := range strings.Split(descWrapped, "\n") { + if j == 0 { + b.WriteString(prefix + line + "\n") + } else { + b.WriteString(strings.Repeat(" ", len(prefix)) + line + "\n") + } + } + if issue.Suggestion != "" { + sugWrapped := lipgloss.NewStyle().Width(issueWrap).Render("Suggestion: " + issue.Suggestion) + for _, line := range strings.Split(sugWrapped, "\n") { + b.WriteString(dimStyle.Render(strings.Repeat(" ", len(prefix))+line) + "\n") + } + } + } + b.WriteString("\n") + } + + return b.String() +} + +func (m *simulateModel) renderChatTranscript(jobID string) string { + if m.run.Summary == nil || m.run.Summary.ChatHistory == nil { + return "" + } + chatCtx, ok := m.run.Summary.ChatHistory[jobID] + if !ok || chatCtx == nil || len(chatCtx.Items) == 0 { + return "" + } + + var b strings.Builder + b.WriteString("\n") + b.WriteString(boldStyle.Render(" Transcript:")) + b.WriteString("\n\n") + + for _, item := range chatCtx.Items { + switch v := item.Item.(type) { + case *agent.ChatContext_ChatItem_Message: + msg := v.Message + role := chatRoleLabel(msg.Role) + text := chatMessageText(msg) + b.WriteString(fmt.Sprintf(" %s: %s\n", role, text)) + case *agent.ChatContext_ChatItem_FunctionCall: + fc := v.FunctionCall + args := fc.Arguments + if len(args) > 80 { + args = args[:80] + "..." + } + b.WriteString(dimStyle.Render(fmt.Sprintf(" [call] %s(%s)", fc.Name, args))) + b.WriteString("\n") + case *agent.ChatContext_ChatItem_FunctionCallOutput: + fco := v.FunctionCallOutput + output := fco.Output + if len(output) > 80 { + output = output[:80] + "..." + } + label := "output" + if fco.IsError { + label = "error" + } + b.WriteString(dimStyle.Render(fmt.Sprintf(" [%s] %s -> %s", label, fco.Name, output))) + b.WriteString("\n") + case *agent.ChatContext_ChatItem_AgentHandoff: + h := v.AgentHandoff + b.WriteString(dimStyle.Render(fmt.Sprintf(" [handoff] -> %s", h.NewAgentId))) + b.WriteString("\n") + case *agent.ChatContext_ChatItem_AgentConfigUpdate: + b.WriteString(dimStyle.Render(" [config update]")) + b.WriteString("\n") + } + } + return b.String() +} + +func chatRoleLabel(role agent.ChatRole) string { + switch role { + case agent.ChatRole_USER: + return cyanStyle.Render("User") + case agent.ChatRole_ASSISTANT: + return greenStyle.Render("Agent") + case agent.ChatRole_SYSTEM: + return dimStyle.Render("System") + case agent.ChatRole_DEVELOPER: + return dimStyle.Render("Developer") + default: + return dimStyle.Render("Unknown") + } +} + +func chatMessageText(msg *agent.ChatMessage) string { + if msg == nil || len(msg.Content) == 0 { + return "" + } + var parts []string + for _, c := range msg.Content { + if t := c.GetText(); t != "" { + parts = append(parts, t) + } + } + return strings.Join(parts, " ") +} + +func (m *simulateModel) renderLogs() string { + if m.agent == nil { + return "" + } + var b strings.Builder + b.WriteString(dimStyle.Render(" " + strings.Repeat("─", 40))) + b.WriteString("\n") + logBudget := m.height - 15 + if logBudget < 3 { + logBudget = 3 + } + lines := m.agent.RecentLogs(logBudget) + for _, line := range lines { + b.WriteString(dimStyle.Render(" "+line) + "\n") + } + return b.String() +} + +// firstMeaningfulLine returns the first non-empty, non-heading line from text. +func firstMeaningfulLine(text string) string { + for _, line := range strings.Split(text, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + return line + } + return "" +} + +func (m *simulateModel) renderHint() string { + var hint string + if m.detailJobID != "" { + hint = " ESC/q back · Ctrl+L logs" + } else { + hint = " ↑↓/Tab navigate · ENTER detail · ←→ filter · d description · Ctrl+L logs" + if m.runFinished { + hint += " · q quit" + } + } + return m.quoteAboveHint(hint) +} + +func (m *simulateModel) quoteAboveHint(hint string) string { + q := m.quote() + if !m.showLogs && lipgloss.Width(q) < m.width-4 { + return " " + q + "\n" + dimStyle.Render(hint) + } + return dimStyle.Render(hint) +} + +func jobIcon(job *livekit.SimulationRun_Job) string { + switch job.Status { + case livekit.SimulationRun_Job_STATUS_COMPLETED: + return greenStyle.Render("✓") + case livekit.SimulationRun_Job_STATUS_FAILED: + return redStyle.Render("✗") + case livekit.SimulationRun_Job_STATUS_RUNNING: + return yellowStyle.Render("●") + default: + return dimStyle.Render("○") + } +} diff --git a/cmd/lk/utils.go b/cmd/lk/utils.go index caf3571e..af5ba82e 100644 --- a/cmd/lk/utils.go +++ b/cmd/lk/utils.go @@ -18,6 +18,7 @@ import ( "context" "errors" "fmt" + "io" "maps" "os" "strings" @@ -233,6 +234,7 @@ func parseKeyValuePairs(c *cli.Command, flag string) (map[string]string, error) type loadParams struct { requireURL bool confirmProject bool + output io.Writer } type loadOption func(*loadParams) @@ -244,6 +246,12 @@ var ( confirmProject = func(p *loadParams) { p.confirmProject = true } + outputToStderr = func(p *loadParams) { + p.output = os.Stderr + } + quietOutput = func(p *loadParams) { + p.output = io.Discard + } ) // attempt to load connection config, it'll prioritize @@ -251,13 +259,14 @@ var ( // 2. config file (by default, livekit.toml) // 3. default project config func loadProjectDetails(c *cli.Command, opts ...loadOption) (*config.ProjectConfig, error) { - p := loadParams{requireURL: true, confirmProject: false} + p := loadParams{requireURL: true, confirmProject: false, output: os.Stdout} for _, opt := range opts { opt(&p) } + w := p.output logDetails := func(c *cli.Command, pc *config.ProjectConfig) { if c.Bool("verbose") { - fmt.Printf("URL: %s, api-key: %s, api-secret: %s\n", + fmt.Fprintf(w, "URL: %s, api-key: %s, api-secret: %s\n", pc.URL, pc.APIKey, "************", @@ -275,7 +284,7 @@ func loadProjectDetails(c *cli.Command, opts ...loadOption) (*config.ProjectConf if err != nil { return nil, err } - fmt.Fprintf(os.Stderr, "Using project [%s]\n", util.Accented(c.String("project"))) + fmt.Fprintln(w, "Using project ["+util.Accented(c.String("project"))+"]") logDetails(c, pc) return pc, nil } @@ -289,7 +298,7 @@ func loadProjectDetails(c *cli.Command, opts ...loadOption) (*config.ProjectConf if err != nil { return nil, err } - fmt.Fprintf(os.Stderr, "Using project [%s]\n", util.Accented(pc.Name)) + fmt.Fprintln(w, "Using project ["+util.Accented(pc.Name)+"]") logDetails(c, pc) return pc, nil } @@ -323,7 +332,7 @@ func loadProjectDetails(c *cli.Command, opts ...loadOption) (*config.ProjectConf envVars = append(envVars, "api-secret") } if len(envVars) > 0 { - fmt.Fprintf(os.Stderr, "Using %s from environment\n", strings.Join(envVars, ", ")) + fmt.Fprintf(w, "Using %s from environment\n", strings.Join(envVars, ", ")) logDetails(c, pc) } return pc, nil @@ -331,7 +340,7 @@ func loadProjectDetails(c *cli.Command, opts ...loadOption) (*config.ProjectConf if c.Bool("dev") { pc.APIKey = "devkey" pc.APISecret = "secret" - fmt.Fprintln(os.Stderr, "Using dev credentials") + fmt.Fprintln(w, "Using dev credentials") return pc, nil } @@ -363,13 +372,13 @@ func loadProjectDetails(c *cli.Command, opts ...loadOption) (*config.ProjectConf if _, err = selectProject(context.Background(), c); err != nil { return nil, err } - fmt.Fprintf(os.Stderr, "Using project [%s]\n", util.Accented(project.Name)) + fmt.Fprintf(w, "Using project [%s]\n", util.Accented(project.Name)) return project, nil } } } else { if !c.Bool("silent") && !SkipPrompts(c) { - fmt.Fprintf(os.Stderr, "Using default project [%s]\n", util.Theme.Focused.Title.Render(dp.Name)) + fmt.Fprintln(w, "Using default project ["+util.Theme.Focused.Title.Render(dp.Name)+"]") logDetails(c, dp) } } diff --git a/go.mod b/go.mod index c2eaa927..8355c147 100644 --- a/go.mod +++ b/go.mod @@ -5,15 +5,18 @@ go 1.25.0 require ( github.com/BurntSushi/toml v1.5.0 github.com/Masterminds/semver/v3 v3.4.0 + github.com/charmbracelet/bubbles v0.21.1-0.20250623103423-23b8fd6302d7 + github.com/charmbracelet/bubbletea v1.3.6 github.com/charmbracelet/huh v0.7.1-0.20250818142555-c41a69ba6443 github.com/charmbracelet/huh/spinner v0.0.0-20250818142555-c41a69ba6443 github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 github.com/frostbyte73/core v0.1.1 + github.com/fsnotify/fsnotify v1.9.0 github.com/go-logr/logr v1.4.3 github.com/go-task/task/v3 v3.44.1 github.com/joho/godotenv v1.5.1 - github.com/livekit/protocol v1.45.2-0.20260325065350-7558ba4c26d3 - github.com/livekit/server-sdk-go/v2 v2.16.1 + github.com/livekit/protocol v1.45.2-0.20260403151849-8a360e8d0221 + github.com/livekit/server-sdk-go/v2 v2.16.2-0.20260403163006-dbb96cc2c416 github.com/mattn/go-isatty v0.0.20 github.com/moby/patternmatcher v0.6.0 github.com/modelcontextprotocol/go-sdk v1.4.0 @@ -53,8 +56,6 @@ require ( github.com/catppuccin/go v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chainguard-dev/git-urls v1.0.2 // indirect - github.com/charmbracelet/bubbles v0.21.1-0.20250623103423-23b8fd6302d7 // indirect - github.com/charmbracelet/bubbletea v1.3.6 // indirect github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect github.com/charmbracelet/x/ansi v0.9.3 // indirect github.com/charmbracelet/x/cellbuf v0.0.13 // indirect @@ -84,13 +85,12 @@ require ( github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/fatih/color v1.18.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/gammazero/deque v1.2.1 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect github.com/go-git/go-billy/v5 v5.6.2 // indirect github.com/go-git/go-git/v5 v5.16.2 // indirect - github.com/go-jose/go-jose/v3 v3.0.4 // indirect + github.com/go-jose/go-jose/v3 v3.0.5 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/go-task/template v0.2.0 // indirect diff --git a/go.sum b/go.sum index ead5d6b7..1a350ea5 100644 --- a/go.sum +++ b/go.sum @@ -194,8 +194,8 @@ github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMj github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= github.com/go-git/go-git/v5 v5.16.2 h1:fT6ZIOjE5iEnkzKyxTHK1W4HGAsPhqEqiSAssSO77hM= github.com/go-git/go-git/v5 v5.16.2/go.mod h1:4Ge4alE/5gPs30F2H1esi2gPd69R0C39lolkucHBOp8= -github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY= -github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= +github.com/go-jose/go-jose/v3 v3.0.5 h1:BLLJWbC4nMZOfuPVxoZIxeYsn6Nl2r1fITaJ78UQlVQ= +github.com/go-jose/go-jose/v3 v3.0.5/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -273,12 +273,12 @@ github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731 h1:9x+U2HGLrSw5AT github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731/go.mod h1:Rs3MhFwutWhGwmY1VQsygw28z5bWcnEYmS1OG9OxjOQ= github.com/livekit/mediatransportutil v0.0.0-20260309115634-0e2e24b36ee8 h1:coWig9fKxdb/nwOaIoGUUAogso12GblAJh/9SA9hcxk= github.com/livekit/mediatransportutil v0.0.0-20260309115634-0e2e24b36ee8/go.mod h1:RCd46PT+6sEztld6XpkCrG1xskb0u3SqxIjy4G897Ss= -github.com/livekit/protocol v1.45.2-0.20260325065350-7558ba4c26d3 h1:wmg/PTPHbIXpKoQvoLcqdJS0K8KpKGf34Xe+YPOPTm8= -github.com/livekit/protocol v1.45.2-0.20260325065350-7558ba4c26d3/go.mod h1:63AUi0vQak6Y6gPqSBHLc+ExYTUwEqF/m4b2IRW1iO0= +github.com/livekit/protocol v1.45.2-0.20260403151849-8a360e8d0221 h1:loe7h+z1kOu/ojprFTYSZBbJVly7gdZgQ/ewElGeLPo= +github.com/livekit/protocol v1.45.2-0.20260403151849-8a360e8d0221/go.mod h1:e6QdWDkfot+M2nRh0eitJUS0ZLuwvKCsfiz2pWWSG3s= github.com/livekit/psrpc v0.7.1 h1:ms37az0QTD3UXIWuUC5D/SkmKOlRMVRsI261eBWu/Vw= github.com/livekit/psrpc v0.7.1/go.mod h1:bZ4iHFQptTkbPnB0LasvRNu/OBYXEu1NA6O5BMFo9kk= -github.com/livekit/server-sdk-go/v2 v2.16.1 h1:ZkIA9OdVvQ6Up1uW/RtQ0YJUgYMJ6+ywOmDg0jX7bTg= -github.com/livekit/server-sdk-go/v2 v2.16.1/go.mod h1:oQbYijcbPzfjBAOzoq7tz9Ktqur8JNRCd923VP8xOQQ= +github.com/livekit/server-sdk-go/v2 v2.16.2-0.20260403163006-dbb96cc2c416 h1:QrNZ7Klt9wb/w/wS7o+Sgb3qWEomFRiUxeKTfMZss7w= +github.com/livekit/server-sdk-go/v2 v2.16.2-0.20260403163006-dbb96cc2c416/go.mod h1:VNVkPtV8HO3MOe5X13ODK20Mvxd5VQTGgKNDSA+KE6Q= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/magefile/mage v1.16.1 h1:j5UwkdA48xTlGs0Hcm1Q3sSAcxBorntQjiewDNMsqlo= diff --git a/pkg/agentfs/detect.go b/pkg/agentfs/detect.go index 55a0ceae..4d8d1c05 100644 --- a/pkg/agentfs/detect.go +++ b/pkg/agentfs/detect.go @@ -16,7 +16,10 @@ package agentfs import ( "errors" + "fmt" "io/fs" + "os" + "path/filepath" "github.com/livekit/livekit-cli/v2/pkg/util" "github.com/pelletier/go-toml" @@ -115,3 +118,26 @@ func DetectProjectType(dir fs.FS) (ProjectType, error) { return ProjectTypeUnknown, errors.New("expected package.json, requirements.txt, pyproject.toml, or lock files") } + +// DetectProjectRoot walks up from dir to find a directory containing project +// files (pyproject.toml, requirements.txt, package.json, etc). Returns the +// absolute path to the project root and the detected project type. +func DetectProjectRoot(dir string) (string, ProjectType, error) { + absDir, err := filepath.Abs(dir) + if err != nil { + return "", ProjectTypeUnknown, err + } + + for { + pt, err := DetectProjectType(os.DirFS(absDir)) + if err == nil { + return absDir, pt, nil + } + + parent := filepath.Dir(absDir) + if parent == absDir { + return "", ProjectTypeUnknown, fmt.Errorf("could not detect project type in %s or any parent directory", dir) + } + absDir = parent + } +} diff --git a/pkg/apm/bridge.go b/pkg/apm/bridge.go index c21f33f3..ea15eb0d 100644 --- a/pkg/apm/bridge.go +++ b/pkg/apm/bridge.go @@ -7,9 +7,7 @@ package apm // #cgo linux CXXFLAGS: -DWEBRTC_LINUX -DWEBRTC_POSIX // #cgo windows CXXFLAGS: -DWEBRTC_WIN // #cgo arm64 CXXFLAGS: -DWEBRTC_HAS_NEON -DWEBRTC_ARCH_ARM64 -// #cgo darwin LDFLAGS: -lc++ -// #cgo linux LDFLAGS: -lc++ -lm -lpthread -// #cgo windows LDFLAGS: -lc++ +// #cgo linux LDFLAGS: -lm -lpthread // #include "bridge.h" import "C" diff --git a/pkg/console/fft.go b/pkg/console/fft.go new file mode 100644 index 00000000..b1ac4983 --- /dev/null +++ b/pkg/console/fft.go @@ -0,0 +1,64 @@ +//go:build console + +package console + +import ( + "math" + "math/cmplx" +) + +// fft computes an in-place radix-2 Cooley-Tukey FFT. +func fft(a []complex128) { + n := len(a) + if n <= 1 { + return + } + + // Bit-reversal permutation + for i, j := 1, 0; i < n; i++ { + bit := n >> 1 + for ; j&bit != 0; bit >>= 1 { + j ^= bit + } + j ^= bit + if i < j { + a[i], a[j] = a[j], a[i] + } + } + + // Butterfly stages + for length := 2; length <= n; length <<= 1 { + angle := -2 * math.Pi / float64(length) + wn := cmplx.Exp(complex(0, angle)) + for i := 0; i < n; i += length { + w := complex(1, 0) + for j := 0; j < length/2; j++ { + u := a[i+j] + v := w * a[i+j+length/2] + a[i+j] = u + v + a[i+j+length/2] = u - v + w *= wn + } + } + } +} + +// rfft computes the real FFT of x, returning n/2+1 complex bins +// where n is the next power of 2 >= len(x). +func rfft(x []float64) ([]complex128, int) { + n := nextPow2(len(x)) + buf := make([]complex128, n) + for i, v := range x { + buf[i] = complex(v, 0) + } + fft(buf) + return buf[:n/2+1], n +} + +func nextPow2(n int) int { + p := 1 + for p < n { + p <<= 1 + } + return p +} diff --git a/pkg/console/pipeline.go b/pkg/console/pipeline.go new file mode 100644 index 00000000..8b6b399f --- /dev/null +++ b/pkg/console/pipeline.go @@ -0,0 +1,612 @@ +//go:build console + +// Package console implements the audio pipeline for the lk console command. +// It connects microphone input and speaker output via PortAudio, applies +// WebRTC audio processing (echo cancellation, noise suppression), and +// communicates with an agent over TCP using protobuf-framed SessionMessages. +// +// Architecture (3 goroutines, matching the Python console's PortAudio model): +// +// micLoop — reads PortAudio input into the capture ring buffer. +// speakerLoop — reads both rings, runs ProcessRender + ProcessCapture in +// lockstep, writes to speakers, sends capture to agent. +// Paced by outputStream.Write at the hardware output rate. +// tcpReader — reads TCP messages: audio → playback ring, events → TUI. +// +// All APM calls happen in speakerLoop, so they are single-threaded and +// guaranteed 1:1. +package console + +import ( + "context" + "encoding/binary" + "fmt" + "math" + "net" + "sync" + "time" + + agent "github.com/livekit/protocol/livekit/agent" + + "github.com/livekit/livekit-cli/v2/pkg/apm" + "github.com/livekit/livekit-cli/v2/pkg/portaudio" +) + +const ( + SampleRate = 48000 + Channels = 1 + FrameDurationMs = 30 + SamplesPerFrame = SampleRate * FrameDurationMs / 1000 // 1440 + APMFrameSamples = SampleRate / 100 // 480 (10ms) + NumFFTBands = 14 + + CaptureRingFrames = 50 // ~1.5s — small, just absorbs jitter between mic and speaker loops + PlaybackRingFrames = 4000 // ~120s — large, TTS pushes faster than real-time +) + +type AudioPipeline struct { + inputStream *portaudio.Stream + outputStream *portaudio.Stream + apmInst *apm.APM + noAEC bool + conn net.Conn + connMu sync.Mutex // protects writes to conn + + captureRing *RingBuffer + playbackRing *RingBuffer + + // Events channel receives AgentSessionEvents from the agent for the TUI. + Events chan *agent.AgentSessionEvent + + // Responses channel receives SessionResponses (request completions) for the TUI. + Responses chan *agent.SessionResponse + + // ready is closed when the agent session is established (first TCP message). + ready chan struct{} + readyOnce sync.Once + + // flushCancel cancels the current waitForDrainAndAck goroutine. + // Only accessed from the tcpReader goroutine. + flushCancel context.CancelFunc + + mu sync.Mutex + fftBands [NumFFTBands]float64 + muted bool + level float64 // capture level in dB + playing bool // true when outputting real audio (not silence) + + cancel context.CancelFunc + audioCtx context.Context // stored so EnableAudio can start goroutines + wg sync.WaitGroup +} + +type PipelineConfig struct { + InputDevice *portaudio.DeviceInfo // nil to skip audio (text-only) + OutputDevice *portaudio.DeviceInfo // nil to skip audio (text-only) + NoAEC bool + Conn net.Conn +} + +func NewPipeline(cfg PipelineConfig) (*AudioPipeline, error) { + ap := &AudioPipeline{ + conn: cfg.Conn, + noAEC: cfg.NoAEC, + Events: make(chan *agent.AgentSessionEvent, 64), + Responses: make(chan *agent.SessionResponse, 16), + ready: make(chan struct{}), + } + + if cfg.InputDevice != nil && cfg.OutputDevice != nil { + if err := ap.initAudio(cfg.InputDevice, cfg.OutputDevice, cfg.NoAEC); err != nil { + return nil, err + } + } + + return ap, nil +} + +func (p *AudioPipeline) initAudio(inputDev, outputDev *portaudio.DeviceInfo, noAEC bool) error { + inputStream, err := portaudio.OpenInputStream(inputDev, SampleRate, Channels, SamplesPerFrame) + if err != nil { + return err + } + + outputStream, err := portaudio.OpenOutputStream(outputDev, SampleRate, Channels, SamplesPerFrame) + if err != nil { + inputStream.Close() + return err + } + + var apmInst *apm.APM + if !noAEC { + apmCfg := apm.DefaultConfig() + apmCfg.CaptureChannels = Channels + apmCfg.RenderChannels = Channels + apmInst, err = apm.NewAPM(apmCfg) + if err != nil { + apmInst = nil // run without AEC + } + } + + if apmInst != nil { + inInfo := inputStream.Info() + outInfo := outputStream.Info() + delayMs := int((inInfo.InputLatency + outInfo.OutputLatency).Milliseconds()) + apmInst.SetStreamDelayMs(delayMs) + } + + p.inputStream = inputStream + p.outputStream = outputStream + p.apmInst = apmInst + p.captureRing = NewRingBuffer(SamplesPerFrame * CaptureRingFrames) + p.playbackRing = NewRingBuffer(SamplesPerFrame * PlaybackRingFrames) + return nil +} + +// EnableAudio lazily initializes audio devices. Returns an error if +// PortAudio is not available or devices cannot be opened. +func (p *AudioPipeline) EnableAudio() error { + if p.HasAudio() { + return nil + } + + if err := portaudio.Initialize(); err != nil { + return fmt.Errorf("failed to initialize PortAudio: %w", err) + } + + inputDev, err := portaudio.DefaultInputDevice() + if err != nil { + portaudio.Terminate() + return fmt.Errorf("input device: %w", err) + } + outputDev, err := portaudio.DefaultOutputDevice() + if err != nil { + portaudio.Terminate() + return fmt.Errorf("output device: %w", err) + } + + if err := p.initAudio(inputDev, outputDev, p.noAEC); err != nil { + portaudio.Terminate() + return err + } + + // Start the audio loops + if err := p.outputStream.Start(); err != nil { + return err + } + if err := p.inputStream.Start(); err != nil { + p.outputStream.Stop() + return err + } + + p.wg.Add(2) + ctx := p.audioCtx + go p.micLoop(ctx) + go p.speakerLoop(ctx) + + return nil +} + +// HasAudio reports whether the audio pipeline is active. +func (p *AudioPipeline) HasAudio() bool { + return p.inputStream != nil +} + +func (p *AudioPipeline) Start(ctx context.Context) error { + ctx, p.cancel = context.WithCancel(ctx) + p.audioCtx = ctx + + // Always run the TCP reader for events/responses. + p.wg.Add(1) + go p.tcpReader(ctx) + + // Start audio loops if devices are available. + if p.HasAudio() { + if err := p.outputStream.Start(); err != nil { + return err + } + if err := p.inputStream.Start(); err != nil { + p.outputStream.Stop() + return err + } + p.wg.Add(2) + go p.micLoop(ctx) + go p.speakerLoop(ctx) + } + + <-ctx.Done() + return nil +} + +func (p *AudioPipeline) Stop() { + if p.cancel != nil { + p.cancel() + } + + if p.HasAudio() { + p.inputStream.Abort() + p.outputStream.Abort() + } + p.conn.Close() + if p.captureRing != nil { + p.captureRing.cond.Broadcast() + } + + p.wg.Wait() + + if p.HasAudio() { + p.inputStream.Close() + p.outputStream.Close() + } + if p.apmInst != nil { + p.apmInst.Close() + } +} + +func (p *AudioPipeline) writeMessage(msg *agent.AgentSessionMessage) error { + p.connMu.Lock() + defer p.connMu.Unlock() + return WriteSessionMessage(p.conn, msg) +} + +func (p *AudioPipeline) SendRequest(req *agent.SessionRequest) error { + return p.writeMessage(&agent.AgentSessionMessage{ + Message: &agent.AgentSessionMessage_Request{Request: req}, + }) +} + +func (p *AudioPipeline) SetMuted(muted bool) { + p.mu.Lock() + p.muted = muted + p.mu.Unlock() +} + +func (p *AudioPipeline) Muted() bool { + p.mu.Lock() + defer p.mu.Unlock() + return p.muted +} + +func (p *AudioPipeline) Level() float64 { + p.mu.Lock() + defer p.mu.Unlock() + return p.level +} + +func (p *AudioPipeline) FFTBands() [NumFFTBands]float64 { + p.mu.Lock() + defer p.mu.Unlock() + return p.fftBands +} + +func (p *AudioPipeline) IsPlaying() bool { + p.mu.Lock() + defer p.mu.Unlock() + return p.playing +} + +func (p *AudioPipeline) AECStats() *apm.Stats { + if p.apmInst == nil { + return nil + } + s := p.apmInst.GetStats() + return &s +} + +// micLoop reads mic input at hardware rate and writes to the capture ring. +// Muting is applied here so speakerLoop always sees clean data. +func (p *AudioPipeline) micLoop(ctx context.Context) { + defer p.wg.Done() + buf := make([]int16, SamplesPerFrame*Channels) + + for { + if ctx.Err() != nil { + return + } + if err := p.inputStream.Read(buf); err != nil { + if ctx.Err() != nil { + return + } + continue + } + + p.mu.Lock() + muted := p.muted + p.mu.Unlock() + + if muted { + for i := range buf { + buf[i] = 0 + } + } + + p.captureRing.Write(buf) + } +} + +// speakerLoop runs all APM processing and output. Paced by outputStream.Write +// at the hardware output rate (~30ms). Each iteration: +// 1. Reads capture from captureRing (non-blocking, silence if empty) +// 2. Reads playback from playbackRing (non-blocking, silence if empty) +// 3. ProcessRender then ProcessCapture (single-threaded, 1:1) +// 4. Writes playback to speakers +// 5. Sends processed capture to agent +func (p *AudioPipeline) speakerLoop(ctx context.Context) { + defer p.wg.Done() + captureBuf := make([]int16, SamplesPerFrame*Channels) + playbackBuf := make([]int16, SamplesPerFrame*Channels) + apmBuf := make([]int16, APMFrameSamples*Channels) + ready := false + + for { + if ctx.Err() != nil { + return + } + + // Read capture (non-blocking); pad remainder with silence. + cn := p.captureRing.ReadAvailable(captureBuf) + for i := cn; i < len(captureBuf); i++ { + captureBuf[i] = 0 + } + + // Read playback (non-blocking); pad remainder with silence. + pn := p.playbackRing.ReadAvailable(playbackBuf) + for i := pn; i < len(playbackBuf); i++ { + playbackBuf[i] = 0 + } + + p.mu.Lock() + p.playing = pn > 0 + p.mu.Unlock() + + // ProcessRender then ProcessCapture — both in this goroutine, + // right next to each other, no mutex needed. + if p.apmInst != nil { + for i := 0; i < SamplesPerFrame; i += APMFrameSamples { + copy(apmBuf, playbackBuf[i:i+APMFrameSamples]) + _ = p.apmInst.ProcessRender(apmBuf) + + copy(apmBuf, captureBuf[i:i+APMFrameSamples]) + _ = p.apmInst.ProcessCapture(apmBuf) + copy(captureBuf[i:], apmBuf) + } + } + + // Write playback to speakers — blocks at hardware rate. + if err := p.outputStream.Write(playbackBuf); err != nil { + if ctx.Err() != nil { + return + } + } + + // Send processed capture to agent (only after session is ready). + if !ready { + select { + case <-p.ready: + ready = true + default: + continue + } + } + + p.computeMetrics(captureBuf) + + _ = p.writeMessage(&agent.AgentSessionMessage{ + Message: &agent.AgentSessionMessage_AudioInput{ + AudioInput: &agent.AgentSessionMessage_ConsoleIO_AudioFrame{ + Data: SamplesToBytes(captureBuf), + SampleRate: SampleRate, + NumChannels: Channels, + SamplesPerChannel: uint32(SamplesPerFrame), + }, + }, + }) + } +} + +// tcpReader reads messages from the agent over TCP and dispatches them. +func (p *AudioPipeline) tcpReader(ctx context.Context) { + defer p.wg.Done() + + for { + msg, err := ReadSessionMessage(p.conn) + if err != nil { + return + } + + p.readyOnce.Do(func() { close(p.ready) }) + + switch m := msg.Message.(type) { + case *agent.AgentSessionMessage_AudioOutput: + p.playbackRing.Write(BytesToSamples(m.AudioOutput.Data)) + + case *agent.AgentSessionMessage_Event: + select { + case p.Events <- m.Event: + default: + } + + case *agent.AgentSessionMessage_AudioPlaybackClear: + if p.flushCancel != nil { + p.flushCancel() + p.flushCancel = nil + } + p.playbackRing.Reset() + + case *agent.AgentSessionMessage_AudioPlaybackFlush: + if p.flushCancel != nil { + p.flushCancel() + } + flushCtx, cancel := context.WithCancel(ctx) + p.flushCancel = cancel + go p.waitForDrainAndAck(flushCtx) + + case *agent.AgentSessionMessage_Response: + // Forward response so the TUI knows the request completed. + // Don't synthesize ConversationItemAdded — those arrive via the + // event stream already. + if m.Response != nil { + select { + case p.Responses <- m.Response: + default: + } + } + } + } +} + +func (p *AudioPipeline) sendPlaybackFinished() { + _ = p.writeMessage(&agent.AgentSessionMessage{ + Message: &agent.AgentSessionMessage_AudioPlaybackFinished{ + AudioPlaybackFinished: &agent.AgentSessionMessage_ConsoleIO_AudioPlaybackFinished{}, + }, + }) +} + +func (p *AudioPipeline) waitForDrainAndAck(ctx context.Context) { + for p.playbackRing.Available() > 0 { + select { + case <-ctx.Done(): + return + default: + } + time.Sleep(5 * time.Millisecond) + } + select { + case <-ctx.Done(): + return + default: + } + p.sendPlaybackFinished() +} + +func (p *AudioPipeline) computeMetrics(samples []int16) { + n := len(samples) + sr := float64(SampleRate) + + // Convert to float64, normalize, apply Hanning window + x := make([]float64, n) + for i, s := range samples { + v := float64(s) / 32768.0 + w := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(n))) + x[i] = v * w + } + + // Real FFT + X, nfft := rfft(x) + + // Magnitude spectrum, scaled by 2/n + mag := make([]float64, len(X)) + scale := 2.0 / float64(n) + for i, c := range X { + r, im := real(c), imag(c) + mag[i] = math.Sqrt(r*r+im*im) * scale + } + mag[0] *= 0.5 + if n%2 == 0 { + mag[len(mag)-1] *= 0.5 + } + + // Geometric frequency band edges: 20 Hz → Nyquist*0.96 + nb := NumFFTBands + nyquist := sr * 0.5 * 0.96 + logLow := math.Log(20.0) + logHigh := math.Log(nyquist) + edges := make([]float64, nb+1) + for i := 0; i <= nb; i++ { + edges[i] = math.Exp(logLow + float64(i)*(logHigh-logLow)/float64(nb)) + } + + // Bin power into frequency bands + binFreq := sr / float64(nfft) + sump := make([]float64, nb) + cnts := make([]float64, nb) + for i, m := range mag { + freq := float64(i) * binFreq + // Find band via edges (equivalent to np.digitize - 1, clipped) + band := nb - 1 + for b := 1; b <= nb; b++ { + if freq < edges[b] { + band = b - 1 + break + } + } + if band < 0 { + band = 0 + } + sump[band] += m * m + cnts[band]++ + } + + // Mean power → dB → normalize to [0,1] + const floorDB, hotDB = -70.0, -20.0 + var bands [NumFFTBands]float64 + for b := 0; b < nb; b++ { + c := cnts[b] + if c == 0 { + c = 1 + } + pmean := sump[b] / c + db := 10.0 * math.Log10(pmean + 1e-12) + lev := (db - floorDB) / (hotDB - floorDB) + lev = math.Max(0, math.Min(1, lev)) + // Power-law compression + lev = math.Max(math.Pow(lev, 0.75)-0.02, 0) + bands[b] = lev + } + + // Peak normalization (cap scale at 3x to avoid blowing up silence) + peak := 0.0 + for _, v := range bands { + if v > peak { + peak = v + } + } + normScale := math.Min(0.95/(peak+1e-6), 3.0) + for b := range bands { + bands[b] = math.Min(bands[b]*normScale, 1.0) + } + + // Exponential decay smoothing (~100ms time constant) + decay := math.Exp(-float64(n) / sr / 0.1) + + // RMS level in dB + var sum float64 + for _, s := range samples { + v := float64(s) / 32768.0 + sum += v * v + } + rms := math.Sqrt(sum / float64(n)) + db := 20 * math.Log10(rms+1e-10) + + p.mu.Lock() + for b := 0; b < nb; b++ { + if bands[b] > p.fftBands[b]*decay { + p.fftBands[b] = bands[b] + } else { + p.fftBands[b] *= decay + } + } + p.level = db + p.mu.Unlock() +} + +func SamplesToBytes(samples []int16) []byte { + buf := make([]byte, len(samples)*2) + for i, s := range samples { + binary.LittleEndian.PutUint16(buf[i*2:], uint16(s)) + } + return buf +} + +func BytesToSamples(data []byte) []int16 { + n := len(data) / 2 // truncate odd trailing byte + if n == 0 { + return nil + } + samples := make([]int16, n) + for i := range samples { + samples[i] = int16(binary.LittleEndian.Uint16(data[i*2:])) + } + return samples +} diff --git a/pkg/console/ringbuffer.go b/pkg/console/ringbuffer.go new file mode 100644 index 00000000..29648bb4 --- /dev/null +++ b/pkg/console/ringbuffer.go @@ -0,0 +1,130 @@ +//go:build console + +package console + +import ( + "sync" + "sync/atomic" +) + +// RingBuffer is a SPSC ring buffer for int16 audio samples. +// When the writer outruns the reader, the reader skips ahead to avoid stale data. +type RingBuffer struct { + buf []int16 + size int + r atomic.Int64 + w atomic.Int64 + mu sync.Mutex // only for condition variable + cond *sync.Cond +} + +func NewRingBuffer(size int) *RingBuffer { + rb := &RingBuffer{ + buf: make([]int16, size), + size: size, + } + rb.cond = sync.NewCond(&rb.mu) + return rb +} + +func (rb *RingBuffer) Write(samples []int16) int { + n := len(samples) + if n > rb.size { + samples = samples[n-rb.size:] + n = rb.size + } + w := int(rb.w.Load()) + for i := 0; i < n; i++ { + rb.buf[(w+i)%rb.size] = samples[i] + } + rb.w.Add(int64(n)) + rb.cond.Signal() + return n +} + +// ReadAvailable copies up to len(out) available samples into out (non-blocking). +// Returns the number of samples actually copied. +func (rb *RingBuffer) ReadAvailable(out []int16) int { + avail := int(rb.w.Load() - rb.r.Load()) + if avail <= 0 { + return 0 + } + // If writer has lapped us, skip ahead + if avail > rb.size { + skip := int64(avail - rb.size) + rb.r.Add(skip) + avail = rb.size + } + n := len(out) + if n > avail { + n = avail + } + r := int(rb.r.Load()) + for i := 0; i < n; i++ { + out[i] = rb.buf[(r+i)%rb.size] + } + rb.r.Add(int64(n)) + return n +} + +// Read blocks until len(out) samples are available, then copies them. +func (rb *RingBuffer) Read(out []int16) bool { + needed := len(out) + copied := 0 + for copied < needed { + avail := int(rb.w.Load() - rb.r.Load()) + if avail <= 0 { + rb.mu.Lock() + for rb.w.Load()-rb.r.Load() <= 0 { + rb.cond.Wait() + } + rb.mu.Unlock() + continue + } + if avail > rb.size { + skip := int64(avail - rb.size) + rb.r.Add(skip) + avail = rb.size + } + toCopy := needed - copied + if toCopy > avail { + toCopy = avail + } + r := int(rb.r.Load()) + for i := 0; i < toCopy; i++ { + out[copied+i] = rb.buf[(r+i)%rb.size] + } + rb.r.Add(int64(toCopy)) + copied += toCopy + } + return true +} + +func (rb *RingBuffer) Available() int { + return int(rb.w.Load() - rb.r.Load()) +} + +// WaitForData blocks until samples are available in the buffer. +// Returns true if data is available, false if woken up with no data +// (e.g., after Reset or Broadcast for shutdown). +func (rb *RingBuffer) WaitForData() bool { + if rb.w.Load()-rb.r.Load() > 0 { + return true + } + rb.mu.Lock() + for rb.w.Load()-rb.r.Load() <= 0 { + rb.cond.Wait() + // After wakeup, re-check. If still empty (Reset/shutdown), return false. + if rb.w.Load()-rb.r.Load() <= 0 { + rb.mu.Unlock() + return false + } + } + rb.mu.Unlock() + return true +} + +func (rb *RingBuffer) Reset() { + rb.r.Store(rb.w.Load()) + rb.cond.Broadcast() +} diff --git a/pkg/console/tcp.go b/pkg/console/tcp.go new file mode 100644 index 00000000..6b3efbad --- /dev/null +++ b/pkg/console/tcp.go @@ -0,0 +1,90 @@ +//go:build console + +package console + +import ( + "errors" + "io" + "net" + "sync" + + "github.com/livekit/livekit-cli/v2/pkg/ipc" + + agent "github.com/livekit/protocol/livekit/agent" +) + +type TCPServer struct { + listener *ipc.Listener + conn net.Conn + mu sync.Mutex + closed bool +} + +func NewTCPServer(addr string) (*TCPServer, error) { + ln, err := ipc.Listen(addr) + if err != nil { + return nil, err + } + return &TCPServer{listener: ln}, nil +} + +func (s *TCPServer) Addr() net.Addr { + return s.listener.Addr() +} + +// Accept waits for a single agent connection; subsequent connections are rejected. +func (s *TCPServer) Accept() (net.Conn, error) { + conn, err := s.listener.Accept() + if err != nil { + return nil, err + } + + s.mu.Lock() + if s.conn != nil { + s.mu.Unlock() + conn.Close() + return nil, errors.New("console tcp: already connected") + } + s.conn = conn + s.mu.Unlock() + + // Close listener to reject further connections + s.listener.Close() + + return conn, nil +} + +// Conn returns the accepted connection, or nil if none. +func (s *TCPServer) Conn() net.Conn { + s.mu.Lock() + defer s.mu.Unlock() + return s.conn +} + +func (s *TCPServer) Close() error { + s.mu.Lock() + defer s.mu.Unlock() + + s.closed = true + var errs []error + if s.conn != nil { + errs = append(errs, s.conn.Close()) + s.conn = nil + } + errs = append(errs, s.listener.Close()) + return errors.Join(errs...) +} + +// WriteSessionMessage sends a protobuf-framed AgentSessionMessage. +func WriteSessionMessage(w io.Writer, msg *agent.AgentSessionMessage) error { + return ipc.WriteProto(w, msg) +} + +// ReadSessionMessage reads a protobuf-framed AgentSessionMessage. +func ReadSessionMessage(r io.Reader) (*agent.AgentSessionMessage, error) { + msg := &agent.AgentSessionMessage{} + if err := ipc.ReadProto(r, msg); err != nil { + return nil, err + } + return msg, nil +} diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go new file mode 100644 index 00000000..e83ac2c9 --- /dev/null +++ b/pkg/ipc/ipc.go @@ -0,0 +1,99 @@ +package ipc + +import ( + "encoding/binary" + "fmt" + "io" + "net" + "sync" + + "google.golang.org/protobuf/proto" +) + +const maxMessageSize = 1 << 20 // 1MB + +// WriteProto sends a protobuf message with a 4-byte big-endian length prefix. +func WriteProto(w io.Writer, msg proto.Message) error { + data, err := proto.Marshal(msg) + if err != nil { + return fmt.Errorf("ipc: marshal: %w", err) + } + if len(data) > maxMessageSize { + return fmt.Errorf("ipc: message too large: %d bytes", len(data)) + } + + buf := make([]byte, 4+len(data)) + binary.BigEndian.PutUint32(buf[:4], uint32(len(data))) + copy(buf[4:], data) + _, err = w.Write(buf) + return err +} + +// ReadProto reads a length-prefixed protobuf message into msg. +func ReadProto(r io.Reader, msg proto.Message) error { + var header [4]byte + if _, err := io.ReadFull(r, header[:]); err != nil { + return err + } + + length := binary.BigEndian.Uint32(header[:]) + if length > maxMessageSize { + return fmt.Errorf("ipc: message too large: %d bytes", length) + } + + data := make([]byte, length) + if length > 0 { + if _, err := io.ReadFull(r, data); err != nil { + return fmt.Errorf("ipc: partial message: %w", err) + } + } + + if err := proto.Unmarshal(data, msg); err != nil { + return fmt.Errorf("ipc: unmarshal: %w", err) + } + return nil +} + +// Listener wraps a net.Listener for protobuf IPC. +type Listener struct { + listener net.Listener + mu sync.Mutex + closed bool +} + +// Listen creates a new IPC listener on the given address. +func Listen(addr string) (*Listener, error) { + ln, err := net.Listen("tcp", addr) + if err != nil { + return nil, fmt.Errorf("ipc: listen on %s: %w", addr, err) + } + return &Listener{listener: ln}, nil +} + +// Accept waits for a new connection. +func (l *Listener) Accept() (net.Conn, error) { + conn, err := l.listener.Accept() + if err != nil { + return nil, err + } + if tc, ok := conn.(*net.TCPConn); ok { + tc.SetNoDelay(true) + } + return conn, nil +} + +// Addr returns the listener's address. +func (l *Listener) Addr() net.Addr { + return l.listener.Addr() +} + +// Close closes the listener. +func (l *Listener) Close() error { + l.mu.Lock() + defer l.mu.Unlock() + if l.closed { + return nil + } + l.closed = true + return l.listener.Close() +} diff --git a/pkg/portaudio/portaudio.go b/pkg/portaudio/portaudio.go index e1ad24ce..5670e538 100644 --- a/pkg/portaudio/portaudio.go +++ b/pkg/portaudio/portaudio.go @@ -6,6 +6,10 @@ package portaudio /* #cgo CFLAGS: -I${SRCDIR}/pa_src/include -I${SRCDIR}/pa_src/src/common -DPA_LITTLE_ENDIAN -Wno-unused-parameter -Wno-deprecated-declarations +#if !__has_include("pa_src/include/portaudio.h") +#error "PortAudio submodule not found. Run: git submodule update --init --recursive" +#else + #include "pa_src/src/common/pa_allocation.c" #include "pa_src/src/common/pa_converters.c" #include "pa_src/src/common/pa_cpuload.c" @@ -18,6 +22,7 @@ package portaudio #include "pa_src/src/common/pa_trace.c" #include "portaudio.h" +#endif */ import "C"