localstack · HarshCasper · Mar 13, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,10 +18,28 @@ jobs:
       - name: Use Node.js 20
         uses: actions/setup-node@v2
         with:
-          node-version: 20.x
+          node-version: 22.x
 
       - name: Install dependencies
         run: yarn
 
       - name: Build the project
         run: yarn build
+
+  mcp-direct-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Use Node.js 22
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22.x
+
+      - name: Install dependencies
+        run: yarn
+
+      - name: Run MCP Server Tester
+        run: yarn test:mcp:direct
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,9 @@ dist
 .xmcp
 xmcp-env.d.ts
 .mcpregistry*
+playwright-report/
+test-results/
+terraform.tfstate*
+.terraform/
+.terraform.lock.hcl
+.mcp-test-results/
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ For other MCP Clients, refer to the [configuration guide](#configuration).
 - [LocalStack CLI](https://docs.localstack.cloud/getting-started/installation/#localstack-cli) and Docker installed in your system path
 - [`cdklocal`](https://github.com/localstack/aws-cdk-local) or [`tflocal`](https://github.com/localstack/terraform-local) installed in your system path for running infrastructure deployment tooling
 - A [valid LocalStack Auth Token](https://docs.localstack.cloud/aws/getting-started/auth-token/) to enable Pro services, IAM Policy Analyzer, Cloud Pods, Chaos Injector, and Extensions tools (**optional**)
-- [Node.js v22.x](https://nodejs.org/en/download/) installed in your system path
+- [Node.js v22.x](https://nodejs.org/en/download/) or higher installed in your system path
 
 ### Configuration
 
@@ -104,19 +104,50 @@ Here's how to add your LocalStack Auth Token to the environment variables:
 
 ## Contributing
 
+Built on the [XMCP](https://github.com/basementstudio/xmcp) framework, you can add new tools by adding a new file to the `src/tools` directory and documenting it in the `manifest.json` file.
+
 Pull requests are welcomed on GitHub! To get started:
 
 - Install Git and Node.js
 - Clone the repository
 - Install dependencies with `yarn`
 - Build with `yarn build`
 
-Built on the [XMCP](https://github.com/basementstudio/xmcp) framework, you can add new tools by adding a new file to the `src/tools` directory and documenting it in the `manifest.json` file.
+### MCP Server Tester
+
+This repository includes [MCP Server Tester](https://github.com/gleanwork/mcp-server-tester) for tool validation in direct mode and LLM host mode.
+
+- Run direct MCP tests (deterministic):
+  ```bash
+  yarn test:mcp:direct
+  ```
+- Run Gemini-based MCP host evals:
+  ```bash
+  export GOOGLE_GENERATIVE_AI_API_KEY="<your-gemini-key>"
+  export LOCALSTACK_AUTH_TOKEN="<your-localstack-auth-token>"
+  yarn test:mcp:evals
+```
+- Open the latest MCP Server Tester HTML report:
+  ```bash
+  npx mcp-server-tester open
+  ```
+- Run both:
+  ```bash
+  yarn test:mcp
+  ```
+
+Notes:
+
+- MCP tests target the local STDIO server command `node dist/stdio.js` by default.
+- `LOCALSTACK_AUTH_TOKEN` is required for the comprehensive Gemini eval suite.
+- You can override the target command with:
+  - `MCP_TEST_COMMAND`
+  - `MCP_TEST_ARGS` (space-separated arguments)
 
 ## License
 
 [Apache License 2.0](./LICENSE)
 
 <a href="https://glama.ai/mcp/servers/@localstack/localstack-mcp-server">
   <img width="380" height="200" src="https://glama.ai/mcp/servers/@localstack/localstack-mcp-server/badge" alt="LocalStack Server MCP server" />
-</a>
+</a>
diff --git a/data/evals/gemini-comprehensive.json b/data/evals/gemini-comprehensive.json
@@ -0,0 +1,110 @@
+{
+  "name": "localstack-mcp-gemini-comprehensive",
+  "description": "Scenario-based Gemini evals for LocalStack MCP tools.",
+  "cases": [
+    {
+      "id": "scenario-start-localstack",
+      "mode": "mcp_host",
+      "scenario": "Start LocalStack for AWS and confirm the runtime is running.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": {
+          "calls": [
+            { "name": "localstack-management", "required": true }
+          ],
+          "order": "any"
+        }
+      }
+    },
+    {
+      "id": "scenario-deploy-terraform-project",
+      "mode": "mcp_host",
+      "scenario": "Deploy the Terraform project in data/sample-terraform to LocalStack.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": {
+          "calls": [{ "name": "localstack-deployer", "required": true }],
+          "order": "any"
+        }
+      }
+    },
+    {
+      "id": "scenario-query-s3-buckets",
+      "mode": "mcp_host",
+      "scenario": "Use AWS CLI on LocalStack to list S3 buckets.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": {
+          "calls": [{ "name": "localstack-aws-client", "required": true }],
+          "order": "any"
+        }
+      }
+    },
+    {
+      "id": "scenario-cloud-pods-operation",
+      "mode": "mcp_host",
+      "scenario": "Save the current LocalStack state in a Cloud Pod called mcptester-e2e-pod, then reset state and load that same pod back.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": {
+          "calls": [{ "name": "localstack-cloud-pods", "required": true }],
+          "order": "any"
+        }
+      }
+    },
+    {
+      "id": "discover-docs-tool",
+      "mode": "mcp_host",
+      "scenario": "Find official LocalStack documentation about IAM policy enforcement modes and summarize it.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": { "calls": [{ "name": "localstack-docs", "required": true }], "order": "any" }
+      }
+    },
+    {
+      "id": "discover-logs-analysis-tool",
+      "mode": "mcp_host",
+      "scenario": "Analyze recent LocalStack logs and give me a summary of errors and API call failures.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": { "calls": [{ "name": "localstack-logs-analysis", "required": true }], "order": "any" }
+      }
+    },
+    {
+      "id": "discover-extensions-tool",
+      "mode": "mcp_host",
+      "scenario": "Show me the available LocalStack extensions in the marketplace.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": { "calls": [{ "name": "localstack-extensions", "required": true }], "order": "any" }
+      }
+    },
+    {
+      "id": "discover-chaos-tool",
+      "mode": "mcp_host",
+      "scenario": "Inject 100ms latency and then clear all active chaos faults.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": { "calls": [{ "name": "localstack-chaos-injector", "required": true }], "order": "any" }
+      }
+    },
+    {
+      "id": "discover-iam-tool",
+      "mode": "mcp_host",
+      "scenario": "Check the current IAM enforcement mode in LocalStack and help me switch to SOFT_MODE.",
+      "mcpHostConfig": { "provider": "google", "model": "gemini-2.0-flash", "temperature": 0 },
+      "iterations": 5,
+      "expect": {
+        "toolsTriggered": { "calls": [{ "name": "localstack-iam-policy-analyzer", "required": true }], "order": "any" }
+      }
+    }
+  ]
+}
diff --git a/data/sample-terraform/main.tf b/data/sample-terraform/main.tf
@@ -0,0 +1,42 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 5.0"
+    }
+  }
+}
+
+provider "aws" {
+  region                      = "us-east-1"
+  access_key                  = "test"
+  secret_key                  = "test"
+  skip_credentials_validation = true
+  skip_metadata_api_check     = true
+  skip_requesting_account_id  = true
+
+  s3_use_path_style = true
+}
+
+variable "name_prefix" {
+  description = "Prefix used for test resources"
+  type        = string
+  default     = "mcpjam-eval"
+}
+
+resource "aws_s3_bucket" "eval_bucket" {
+  bucket        = "${var.name_prefix}-bucket"
+  force_destroy = true
+}
+
+resource "aws_sqs_queue" "eval_queue" {
+  name = "${var.name_prefix}-queue"
+}
+
+output "bucket_name" {
+  value = aws_s3_bucket.eval_bucket.bucket
+}
+
+output "queue_name" {
+  value = aws_sqs_queue.eval_queue.name
+}
diff --git a/package.json b/package.json
@@ -11,7 +11,10 @@
     "dev": "xmcp dev",
     "start": "node dist/stdio.js",
     "format": "prettier --write .",
-    "test": "jest"
+    "test": "jest",
+    "test:mcp:direct": "yarn build && playwright test -c playwright.config.mjs tests/mcp/direct.spec.mjs",
+    "test:mcp:evals": "yarn build && playwright test -c playwright.config.mjs tests/mcp/evals-gemini.spec.mjs",
+    "test:mcp": "yarn test:mcp:direct && yarn test:mcp:evals"
   },
   "dependencies": {
     "dockerode": "^4.0.7",
@@ -20,6 +23,8 @@
     "zod": "4.3.6"
   },
   "devDependencies": {
+    "@gleanwork/mcp-server-tester": "1.0.0-beta.6",
+    "@playwright/test": "^1.58.2",
     "@types/dockerode": "^3.3.43",
     "@types/jest": "^30.0.0",
     "eslint-config-prettier": "^10.1.8",

diff --git a/playwright.config.mjs b/playwright.config.mjs
@@ -0,0 +1,44 @@
+import { defineConfig } from "@playwright/test";
+
+const mcpCommand = process.env.MCP_TEST_COMMAND || "node";
+const mcpArgs = process.env.MCP_TEST_ARGS
+  ? process.env.MCP_TEST_ARGS.split(" ").filter(Boolean)
+  : ["dist/stdio.js"];
+
+export default defineConfig({
+  testDir: "./tests/mcp",
+  timeout: 120000,
+  fullyParallel: false,
+  reporter: [
+    ["list"],
+    [
+      "@gleanwork/mcp-server-tester/reporters/mcpReporter",
+      {
+        outputDir: ".mcp-test-results",
+        autoOpen: false,
+        historyLimit: 20,
+      },
+    ],
+  ],
+  projects: [
+    {
+      name: "localstack-mcp-server",
+      use: {
+        mcpConfig: {
+          transport: "stdio",
+          command: mcpCommand,
+          args: mcpArgs,
+          cwd: process.cwd(),
+          quiet: true,
+          connectTimeoutMs: 30000,
+          requestTimeoutMs: 300000,
+          callTimeoutMs: 300000,
+          env: {
+            ...process.env,
+            LOCALSTACK_AUTH_TOKEN: process.env.LOCALSTACK_AUTH_TOKEN || "",
+          },
+        },
+      },
+    },
+  ],
+});
diff --git a/tests/mcp/direct.spec.mjs b/tests/mcp/direct.spec.mjs
@@ -0,0 +1,32 @@
+import { expect, test } from "@gleanwork/mcp-server-tester/fixtures/mcp";
+
+const EXPECTED_TOOLS = [
+  "localstack-management",
+  "localstack-deployer",
+  "localstack-logs-analysis",
+  "localstack-iam-policy-analyzer",
+  "localstack-chaos-injector",
+  "localstack-cloud-pods",
+  "localstack-extensions",
+  "localstack-aws-client",
+  "localstack-docs",
+];
+
+test("exposes all expected LocalStack MCP tools", async ({ mcp }) => {
+  const tools = await mcp.listTools();
+  const toolNames = tools.map((tool) => tool.name);
+
+  for (const expectedTool of EXPECTED_TOOLS) {
+    expect(toolNames).toContain(expectedTool);
+  }
+});
+
+test("docs tool returns useful documentation snippets", async ({ mcp }) => {
+  const result = await mcp.callTool("localstack-docs", {
+    query: "How to start LocalStack and configure auth token",
+    limit: 2,
+  });
+
+  expect(result).not.toBeToolError();
+  expect(result).toContainToolText("LocalStack Docs");
+});
diff --git a/tests/mcp/evals-gemini.spec.mjs b/tests/mcp/evals-gemini.spec.mjs
@@ -0,0 +1,35 @@
+import { loadEvalDataset, runEvalDataset } from "@gleanwork/mcp-server-tester";
+import { test, expect } from "@gleanwork/mcp-server-tester/fixtures/mcp";
+
+function requireEnv(name) {
+  const value = process.env[name];
+  if (!value || !value.trim()) {
+    throw new Error(`Missing required environment variable: ${name}`);
+  }
+  return value;
+}
+
+test.describe("Gemini comprehensive eval", () => {
+  test.describe.configure({ timeout: 1800000 });
+
+  test("single comprehensive eval dataset passes", async ({ mcp }, testInfo) => {
+    requireEnv("GOOGLE_GENERATIVE_AI_API_KEY");
+    requireEnv("LOCALSTACK_AUTH_TOKEN");
+
+    const dataset = await loadEvalDataset("./data/evals/gemini-comprehensive.json");
+    const result = await runEvalDataset({ dataset }, { mcp, testInfo });
+    const caseResults = result.caseResults || [];
+    const passed = caseResults.filter((entry) => entry?.pass === true).length;
+    const failed = caseResults.filter((entry) => entry?.pass !== true);
+
+    if (failed.length > 0) {
+      console.error(
+        "Comprehensive eval failed cases:",
+        failed.map((entry) => entry.id)
+      );
+    }
+
+    const passRate = caseResults.length > 0 ? passed / caseResults.length : 1;
+    expect(passRate).toBeGreaterThanOrEqual(0.75);
+  });
+});