From cf64549290913b55eccf82a6f1034bd8119395ec Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sun, 7 Jun 2026 21:00:52 -0700 Subject: [PATCH 1/4] refactor(agent): call LiteLLM directly, remove access-control LLM proxy The LiteLLM HTTP proxy in access-control-service existed so the browser could reach LiteLLM without holding the master key. The agent-service is now a trusted backend that holds the key itself, so the proxy hop is redundant. - agent-service: build the OpenAI client against LITELLM_BASE_URL with the master key; serve the model list at GET /api/agents/models from LiteLLM. - access-control-service: delete LiteLLMProxyResource / LiteLLMModelsResource, their auth spec, and the now-dead RolesAllowedDynamicFeature registration; drop LLMConfig and llm.conf. - frontend: fetch models from /api/agents/models. - routing/deploy: drop the /api/models and /api/chat proxy routes (nginx, k8s gateway, dev proxy); point the agent-service deployment at LiteLLM with the litellm-master-key secret; update the enable-LLM guide. --- .../texera/service/AccessControlService.scala | 13 +- .../activity/UserActivityEventListener.scala | 4 +- .../resource/AccessControlResource.scala | 159 +--------- .../service/AccessControlServiceRunSpec.scala | 3 - .../resource/LiteLLMProxyAuthSpec.scala | 299 ------------------ agent-service/.env.example | 12 +- agent-service/src/api/backend-api.ts | 4 +- agent-service/src/config/env.ts | 6 +- agent-service/src/server.ts | 30 +- .../access-control-service-deployment.yaml | 10 - .../templates/agent-service-deployment.yaml | 12 +- bin/k8s/templates/agent-service-secret.yaml | 8 +- bin/k8s/templates/gateway-routes.yaml | 10 - bin/k8s/values-development.yaml | 4 - bin/k8s/values.yaml | 4 - bin/single-node/.env | 6 +- bin/single-node/docker-compose.yml | 2 +- bin/single-node/nginx.conf | 15 +- common/config/src/main/resources/llm.conf | 27 -- .../org/apache/texera/config/LLMConfig.scala | 29 -- docs/tutorials/guide-to-enable-llm-agent.md | 21 +- frontend/proxy.config.json | 10 - .../workspace/service/agent/agent.service.ts | 2 +- 23 files changed, 66 insertions(+), 624 deletions(-) delete mode 100644 access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala delete mode 100644 common/config/src/main/resources/llm.conf delete mode 100644 common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala diff --git a/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala b/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala index e262b80900d..bcf9d19af54 100644 --- a/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala +++ b/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala @@ -32,14 +32,8 @@ import org.apache.texera.auth.{ } import org.apache.texera.dao.SqlServer import org.apache.texera.service.activity.UserActivityEventListener -import org.apache.texera.service.resource.{ - AccessControlResource, - HealthCheckResource, - LiteLLMModelsResource, - LiteLLMProxyResource -} +import org.apache.texera.service.resource.{AccessControlResource, HealthCheckResource} import org.eclipse.jetty.server.session.SessionHandler -import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature import java.nio.file.Path class AccessControlService extends Application[AccessControlServiceConfiguration] with LazyLogging { @@ -73,8 +67,6 @@ class AccessControlService extends Application[AccessControlServiceConfiguration environment.jersey.register(classOf[HealthCheckResource]) environment.jersey.register(classOf[AccessControlResource]) - environment.jersey.register(classOf[LiteLLMProxyResource]) - environment.jersey.register(classOf[LiteLLMModelsResource]) // Register JWT authentication filter environment.jersey.register(new AuthDynamicFeature(classOf[JwtAuthFilter])) @@ -85,9 +77,6 @@ class AccessControlService extends Application[AccessControlServiceConfiguration new io.dropwizard.auth.AuthValueFactoryProvider.Binder(classOf[SessionUser]) ) - // Required for @RolesAllowed on resources to be enforced. - environment.jersey.register(classOf[RolesAllowedDynamicFeature]) - // Record USER_LAST_ACTIVE_TIME on every matched, completed request. // Lives only in this service because authenticated client sessions // contact access-control-service often enough to capture activity diff --git a/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala b/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala index e5fa785f53e..e29167ffd15 100644 --- a/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala +++ b/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala @@ -43,8 +43,8 @@ import org.glassfish.jersey.server.monitoring.{ * Lives in access-control-service because USER_LAST_ACTIVE_TIME is a * user-management concern; the assumption is that any authenticated * client session contacts this service often enough (UI navigation, - * permission checks, LiteLLM proxy) to capture activity with high - * recall, so other services do not need to mirror this listener. + * permission checks) to capture activity with high recall, so other + * services do not need to mirror this listener. */ @Provider class UserActivityEventListener(track: Integer => Unit = UserActivityTracker.markActive) diff --git a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala index 0c90a6ce31f..8a225ad4c61 100644 --- a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala +++ b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala @@ -20,14 +20,13 @@ package org.apache.texera.service.resource import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.typesafe.scalalogging.LazyLogging -import jakarta.annotation.security.{PermitAll, RolesAllowed} -import jakarta.ws.rs.client.{Client, ClientBuilder, Entity} +import jakarta.annotation.security.PermitAll import jakarta.ws.rs.core._ -import jakarta.ws.rs.{Consumes, DELETE, GET, POST, Path, Produces} +import jakarta.ws.rs.{DELETE, GET, POST, Path, Produces} import org.apache.texera.auth.JwtParser.parseToken import org.apache.texera.auth.SessionUser import org.apache.texera.auth.util.{ComputingUnitAccess, HeaderField} -import org.apache.texera.config.{GuiConfig, KubernetesConfig, LLMConfig} +import org.apache.texera.config.KubernetesConfig import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum import java.net.URLDecoder @@ -242,155 +241,3 @@ class AccessControlResource extends LazyLogging { AccessControlResource.authorize(uriInfo, headers) } } - -// Forwards chat completions to LiteLLM with the server's master key, so -// only authenticated users may call it. -@Path("/chat") -@RolesAllowed(Array("REGULAR", "ADMIN")) -@Produces(Array(MediaType.APPLICATION_JSON)) -@Consumes(Array(MediaType.APPLICATION_JSON)) -class LiteLLMProxyResource( - copilotEnabled: Boolean, - litellmBaseUrl: String, - litellmApiKey: String -) extends LazyLogging { - - // No-arg constructor for Jersey reflection. Tests use the param-ful form. - def this() = - this( - GuiConfig.guiWorkflowWorkspaceCopilotEnabled, - LLMConfig.baseUrl, - LLMConfig.masterKey - ) - - private val client: Client = ClientBuilder.newClient() - - @POST - @Path("/{path:.*}") - def proxyPost( - @Context uriInfo: UriInfo, - @Context headers: HttpHeaders, - body: String - ): Response = { - if (!copilotEnabled) { - return Response - .status(Response.Status.FORBIDDEN) - .entity(LiteLLMProxyResource.CopilotDisabledBody) - .build() - } - - // uriInfo.getPath returns "chat/completions" for /api/chat/completions - // We want to forward as "/chat/completions" to LiteLLM - val fullPath = uriInfo.getPath - val targetUrl = s"$litellmBaseUrl/$fullPath" - - logger.info(s"Proxying POST request to LiteLLM: $targetUrl") - - try { - val requestBuilder = client - .target(targetUrl) - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer $litellmApiKey") - - // Forward other relevant headers from the original request - headers.getRequestHeaders.asScala.foreach { - case (key, values) - if !key.equalsIgnoreCase("Authorization") && - !key.equalsIgnoreCase("Host") && - !key.equalsIgnoreCase("Content-Length") => - values.asScala.foreach(value => requestBuilder.header(key, value)) - case _ => // Skip Authorization, Host, and Content-Length headers - } - - val response = requestBuilder.post(Entity.json(body)) - - // Build response with same status and body from LiteLLM - val responseBody = response.readEntity(classOf[String]) - val responseBuilder = Response - .status(response.getStatus) - .entity(responseBody) - - // Forward response headers - response.getHeaders.asScala.foreach { - case (key, values) => - values.asScala.foreach(value => responseBuilder.header(key, value)) - } - - responseBuilder.build() - } catch { - case e: Exception => - logger.error(s"Error proxying request to LiteLLM: ${e.getMessage}", e) - Response - .status(Response.Status.BAD_GATEWAY) - .entity(s"""{"error": "Failed to proxy request to LiteLLM: ${e.getMessage}"}""") - .build() - } - } -} - -object LiteLLMProxyResource { - val CopilotDisabledBody: String = """{"error": "Copilot feature is disabled"}""" -} - -@Path("/models") -@RolesAllowed(Array("REGULAR", "ADMIN")) -@Produces(Array(MediaType.APPLICATION_JSON)) -class LiteLLMModelsResource( - copilotEnabled: Boolean, - litellmBaseUrl: String, - litellmApiKey: String -) extends LazyLogging { - - // No-arg constructor for Jersey reflection. Tests use the param-ful form. - def this() = - this( - GuiConfig.guiWorkflowWorkspaceCopilotEnabled, - LLMConfig.baseUrl, - LLMConfig.masterKey - ) - - private val client: Client = ClientBuilder.newClient() - - @GET - def getModels: Response = { - if (!copilotEnabled) { - return Response - .status(Response.Status.FORBIDDEN) - .entity(LiteLLMProxyResource.CopilotDisabledBody) - .build() - } - - val targetUrl = s"$litellmBaseUrl/models" - - logger.info(s"Fetching models from LiteLLM: $targetUrl") - - try { - val response = client - .target(targetUrl) - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer $litellmApiKey") - .get() - - // Build response with same status and body from LiteLLM - val responseBody = response.readEntity(classOf[String]) - val responseBuilder = Response - .status(response.getStatus) - .entity(responseBody) - - // Forward response headers - response.getHeaders.asScala.foreach { - case (key, values) => - values.asScala.foreach(value => responseBuilder.header(key, value)) - } - - responseBuilder.build() - } catch { - case e: Exception => - logger.error(s"Error fetching models from LiteLLM: ${e.getMessage}", e) - Response - .status(Response.Status.BAD_GATEWAY) - .entity(s"""{"error": "Failed to fetch models from LiteLLM: ${e.getMessage}"}""") - .build() - } - } -} diff --git a/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala b/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala index 2460d18b456..96bfd104d9a 100644 --- a/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala +++ b/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala @@ -25,7 +25,6 @@ import io.dropwizard.jetty.MutableServletContextHandler import io.dropwizard.jetty.setup.ServletEnvironment import org.apache.texera.auth.UnauthorizedExceptionMapper import org.apache.texera.service.activity.UserActivityEventListener -import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature import org.mockito.ArgumentMatchers.isA import org.mockito.Mockito.{mock, verify, when} import org.scalatest.flatspec.AnyFlatSpec @@ -47,8 +46,6 @@ class AccessControlServiceRunSpec extends AnyFlatSpec with Matchers { verify(jersey).register(isA(classOf[UserActivityEventListener])) verify(jersey).register(classOf[UnauthorizedExceptionMapper]) - // Without this feature Jersey ignores @RolesAllowed on the LiteLLM proxies. - verify(jersey).register(classOf[RolesAllowedDynamicFeature]) verify(jersey).setUrlPattern("/api/*") } } diff --git a/access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala b/access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala deleted file mode 100644 index 4d8d271c7d6..00000000000 --- a/access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.texera.service.resource - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} -import io.dropwizard.jackson.Jackson -import io.dropwizard.testing.junit5.ResourceExtension -import jakarta.ws.rs.client.Entity -import jakarta.ws.rs.core.MediaType -import org.apache.texera.auth.{JwtAuth, JwtAuthFilter, UnauthorizedExceptionMapper} -import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum -import org.apache.texera.dao.jooq.generated.tables.pojos.User -import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature -import org.scalatest.BeforeAndAfterAll -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers - -import java.net.InetSocketAddress -import java.nio.charset.StandardCharsets - -// Wires the LiteLLM proxy resources through the same Jersey auth pipeline -// production uses and fires HTTP requests with no / wrong-role / right-role -// Bearer tokens. The @RolesAllowed annotations are only enforced when -// AccessControlService registers RolesAllowedDynamicFeature; this spec is -// the regression guard that the annotation, the registration, and the -// JwtAuthFilter priority continue to compose into the expected status codes. -class LiteLLMProxyAuthSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll { - - private val testMapper: ObjectMapper = - Jackson.newObjectMapper().registerModule(DefaultScalaModule) - - // Inject copilotEnabled = true and a guaranteed-unreachable upstream so - // requests that pass the auth + role gates fall straight into the - // resource's "proxy attempt failed" branch — that 502 confirms the auth - // pipeline didn't short-circuit before the resource body ran. - private val unreachableLiteLLM = "http://127.0.0.1:1" - - private val resources: ResourceExtension = ResourceExtension - .builder() - .setMapper(testMapper) - .addProvider(classOf[JwtAuthFilter]) - .addProvider(classOf[UnauthorizedExceptionMapper]) - .addProvider(classOf[RolesAllowedDynamicFeature]) - .addResource( - new LiteLLMProxyResource( - copilotEnabled = true, - litellmBaseUrl = unreachableLiteLLM, - litellmApiKey = "test" - ) - ) - .addResource( - new LiteLLMModelsResource( - copilotEnabled = true, - litellmBaseUrl = unreachableLiteLLM, - litellmApiKey = "test" - ) - ) - .build() - - // Second resource extension with copilotEnabled = false, used to exercise - // the resource's early-exit branch that returns "Copilot feature is disabled". - private val resourcesCopilotDisabled: ResourceExtension = ResourceExtension - .builder() - .setMapper(testMapper) - .addProvider(classOf[JwtAuthFilter]) - .addProvider(classOf[UnauthorizedExceptionMapper]) - .addProvider(classOf[RolesAllowedDynamicFeature]) - .addResource( - new LiteLLMProxyResource( - copilotEnabled = false, - litellmBaseUrl = unreachableLiteLLM, - litellmApiKey = "test" - ) - ) - .addResource( - new LiteLLMModelsResource( - copilotEnabled = false, - litellmBaseUrl = unreachableLiteLLM, - litellmApiKey = "test" - ) - ) - .build() - - // An in-process HTTP server stands in for LiteLLM so the resources' success - // path (header / body / status forwarding) is exercised end-to-end without - // a network dependency. Bound to port 0 to pick any free port. - private val mockChatBody = """{"id":"mock-chat","choices":[{"message":{"content":"hi"}}]}""" - private val mockModelsBody = """{"data":[{"id":"mock-gpt"}]}""" - - private val mockLiteLLM: HttpServer = HttpServer.create(new InetSocketAddress(0), 0) - mockLiteLLM.createContext("/chat/completions", respondWith(200, mockChatBody)) - mockLiteLLM.createContext("/models", respondWith(200, mockModelsBody)) - - private def respondWith(status: Int, body: String): HttpHandler = - (exchange: HttpExchange) => { - val bytes = body.getBytes(StandardCharsets.UTF_8) - exchange.getResponseHeaders.add("Content-Type", MediaType.APPLICATION_JSON) - exchange.sendResponseHeaders(status, bytes.length.toLong) - val os = exchange.getResponseBody - try os.write(bytes) - finally os.close() - } - - private def mockBaseUrl: String = s"http://127.0.0.1:${mockLiteLLM.getAddress.getPort}" - - // Third extension: copilot on, upstream reachable. Resource is built lazily - // because litellmBaseUrl depends on the mock server's bound port. - private lazy val resourcesMockLiteLLM: ResourceExtension = ResourceExtension - .builder() - .setMapper(testMapper) - .addProvider(classOf[JwtAuthFilter]) - .addProvider(classOf[UnauthorizedExceptionMapper]) - .addProvider(classOf[RolesAllowedDynamicFeature]) - .addResource( - new LiteLLMProxyResource( - copilotEnabled = true, - litellmBaseUrl = mockBaseUrl, - litellmApiKey = "test" - ) - ) - .addResource( - new LiteLLMModelsResource( - copilotEnabled = true, - litellmBaseUrl = mockBaseUrl, - litellmApiKey = "test" - ) - ) - .build() - - override protected def beforeAll(): Unit = { - mockLiteLLM.start() - resources.before() - resourcesCopilotDisabled.before() - resourcesMockLiteLLM.before() - } - override protected def afterAll(): Unit = { - resourcesMockLiteLLM.after() - resourcesCopilotDisabled.after() - resources.after() - mockLiteLLM.stop(0) - } - - private def token(role: UserRoleEnum): String = { - val u = new User() - u.setUid(1) - u.setName("test") - u.setEmail("test@example.com") - u.setGoogleId(null) - u.setRole(role) - JwtAuth.jwtToken(JwtAuth.jwtClaims(u, expireInDays = 1)) - } - - private val chatBody = """{"model":"gpt-4o-mini","messages":[]}""" - - "POST /chat/completions without an Authorization header" should "return 401 with a Bearer challenge" in { - val response = resources - .target("/chat/completions") - .request(MediaType.APPLICATION_JSON) - .post(Entity.json(chatBody)) - response.getStatus shouldBe 401 - response.getHeaderString("WWW-Authenticate") shouldBe JwtAuthFilter.BearerChallenge - } - - it should "return 403 with an INACTIVE-role token" in { - val response = resources - .target("/chat/completions") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.INACTIVE)}") - .post(Entity.json(chatBody)) - response.getStatus shouldBe 403 - } - - it should "return 502 when the upstream LiteLLM call fails" in { - // Exercises the resource's catch branch via a connect-refused upstream. - val response = resources - .target("/chat/completions") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.REGULAR)}") - .post(Entity.json(chatBody)) - response.getStatus shouldBe 502 - } - - it should "return the resource's Copilot-disabled response when copilot is off" in { - // 403 alone is ambiguous (could be from RolesAllowedDynamicFeature); - // matching the entity to the same constant the resource emits proves the - // role check passed and the resource's own early-exit branch fired. - val response = resourcesCopilotDisabled - .target("/chat/completions") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.REGULAR)}") - .post(Entity.json(chatBody)) - response.getStatus shouldBe 403 - response.readEntity(classOf[String]) shouldBe LiteLLMProxyResource.CopilotDisabledBody - } - - "GET /models without an Authorization header" should "return 401 with a Bearer challenge" in { - val response = resources.target("/models").request(MediaType.APPLICATION_JSON).get() - response.getStatus shouldBe 401 - response.getHeaderString("WWW-Authenticate") shouldBe JwtAuthFilter.BearerChallenge - } - - it should "return 403 with an INACTIVE-role token" in { - val response = resources - .target("/models") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.INACTIVE)}") - .get() - response.getStatus shouldBe 403 - } - - it should "return 502 when the upstream LiteLLM call fails" in { - val response = resources - .target("/models") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.ADMIN)}") - .get() - response.getStatus shouldBe 502 - } - - it should "return the resource's Copilot-disabled response when copilot is off" in { - val response = resourcesCopilotDisabled - .target("/models") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.ADMIN)}") - .get() - response.getStatus shouldBe 403 - response.readEntity(classOf[String]) shouldBe LiteLLMProxyResource.CopilotDisabledBody - } - - "POST /chat/completions" should "forward the upstream response when copilot is on and upstream is reachable" in { - val response = resourcesMockLiteLLM - .target("/chat/completions") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.REGULAR)}") - .post(Entity.json(chatBody)) - response.getStatus shouldBe 200 - response.readEntity(classOf[String]) shouldBe mockChatBody - } - - "GET /models" should "forward the upstream response when copilot is on and upstream is reachable" in { - val response = resourcesMockLiteLLM - .target("/models") - .request(MediaType.APPLICATION_JSON) - .header("Authorization", s"Bearer ${token(UserRoleEnum.ADMIN)}") - .get() - response.getStatus shouldBe 200 - response.readEntity(classOf[String]) shouldBe mockModelsBody - } - - // Regression guard for the no-arg auxiliary constructor that Jersey - // reflection picks at production startup. Jersey resolves constructors in - // descending parameter count and skips any whose parameters are not - // @Context / HK2 injectable; LiteLLMProxyResource's 3-arg ctor takes plain - // Boolean / String values, so the no-arg form must exist and be picked. - // addResource(classOf[...]) (vs. addResource(new ...)) exercises that path. - "Jersey reflection" should "instantiate both LiteLLM resources via their no-arg constructors" in { - val reflective = ResourceExtension - .builder() - .setMapper(testMapper) - .addProvider(classOf[JwtAuthFilter]) - .addProvider(classOf[UnauthorizedExceptionMapper]) - .addProvider(classOf[RolesAllowedDynamicFeature]) - .addResource(classOf[LiteLLMProxyResource]) - .addResource(classOf[LiteLLMModelsResource]) - .build() - reflective.before() - try { - val chat = reflective.target("/chat/completions").request(MediaType.APPLICATION_JSON).get() - // Unauthenticated GET on the POST-only chat path: we just need any - // response that proves Jersey wired the resource (4xx is fine; an - // instantiation failure surfaces as 500 or a test setup error). - chat.getStatus should (be >= 400 and be < 500) - - val models = reflective.target("/models").request(MediaType.APPLICATION_JSON).get() - models.getStatus shouldBe 401 - } finally { - reflective.after() - } - } -} diff --git a/agent-service/.env.example b/agent-service/.env.example index 605f157ca0c..d7a45e41f59 100644 --- a/agent-service/.env.example +++ b/agent-service/.env.example @@ -8,12 +8,12 @@ TEXERA_SERVICE_LOG_LEVEL=INFO # Human-readable dev logs via pino-pretty. LOG_PRETTY=true -# LLM_API_KEY authenticates this service to the gateway — NOT the -# upstream provider key (OpenAI / Anthropic / etc.), which is -# configured inside the gateway. "dummy" works when the gateway -# does not enforce a key (e.g. LiteLLM with no master_key). -LLM_API_KEY=dummy -LLM_ENDPOINT=http://localhost:9096 +# The agent service is a trusted backend, so it calls the LiteLLM gateway +# directly with the master key. LITELLM_MASTER_KEY is LiteLLM's master key +# (NOT an upstream provider key); "dummy" works when LiteLLM runs without a +# master_key. LITELLM_BASE_URL points at the LiteLLM proxy. +LITELLM_MASTER_KEY=dummy +LITELLM_BASE_URL=http://localhost:4000 # Texera backend services TEXERA_DASHBOARD_SERVICE_ENDPOINT=http://localhost:8080 diff --git a/agent-service/src/api/backend-api.ts b/agent-service/src/api/backend-api.ts index ffd2c59433f..d19eda5bc3f 100644 --- a/agent-service/src/api/backend-api.ts +++ b/agent-service/src/api/backend-api.ts @@ -21,14 +21,14 @@ import { env } from "../config/env"; interface BackendConfig { apiEndpoint: string; - modelsEndpoint: string; + litellmBaseUrl: string; compileEndpoint: string; executionEndpoint: string; } const currentConfig: BackendConfig = { apiEndpoint: env.TEXERA_DASHBOARD_SERVICE_ENDPOINT, - modelsEndpoint: env.LLM_ENDPOINT, + litellmBaseUrl: env.LITELLM_BASE_URL, compileEndpoint: env.WORKFLOW_COMPILING_SERVICE_ENDPOINT, executionEndpoint: env.WORKFLOW_EXECUTION_SERVICE_ENDPOINT, }; diff --git a/agent-service/src/config/env.ts b/agent-service/src/config/env.ts index 16a25b9be77..d8828d0647d 100644 --- a/agent-service/src/config/env.ts +++ b/agent-service/src/config/env.ts @@ -22,7 +22,9 @@ import { z } from "zod"; const EnvSchema = z.object({ PORT: z.coerce.number().default(3001), API_PREFIX: z.string().default("/api"), - LLM_API_KEY: z.string().default("dummy"), + // Master key for the LiteLLM gateway. The agent service is a trusted backend, + // so it holds this secret and calls LiteLLM directly (no access-control proxy). + LITELLM_MASTER_KEY: z.string().default("dummy"), TEXERA_SERVICE_LOG_LEVEL: z .enum(["ERROR", "WARN", "INFO", "DEBUG"]) .transform(v => v.toLowerCase() as "error" | "warn" | "info" | "debug") @@ -30,7 +32,7 @@ const EnvSchema = z.object({ LOG_PRETTY: z.coerce.boolean().default(false), TEXERA_DASHBOARD_SERVICE_ENDPOINT: z.string().url().default("http://localhost:8080"), - LLM_ENDPOINT: z.string().url().default("http://localhost:9096"), + LITELLM_BASE_URL: z.string().url().default("http://localhost:4000"), WORKFLOW_COMPILING_SERVICE_ENDPOINT: z.string().url().default("http://localhost:9090"), WORKFLOW_EXECUTION_SERVICE_ENDPOINT: z.string().url().default("http://localhost:8085"), EXECUTION_ENDPOINT_TEMPLATE: z.string().optional(), diff --git a/agent-service/src/server.ts b/agent-service/src/server.ts index d5eeae82c9b..2b537d09105 100644 --- a/agent-service/src/server.ts +++ b/agent-service/src/server.ts @@ -52,9 +52,11 @@ async function createAgentInstance( const agentId = `agent-${++agentCounter}`; const config = getBackendConfig(); + // The agent service is a trusted backend, so it calls the LiteLLM gateway + // directly with the master key (no access-control proxy hop). const openai = createOpenAI({ - baseURL: `${config.modelsEndpoint}/api`, - apiKey: env.LLM_API_KEY, + baseURL: config.litellmBaseUrl, + apiKey: env.LITELLM_MASTER_KEY, }); // Reasoning effort variants are configured as separate model entries in litellm-config.yaml @@ -165,6 +167,26 @@ const agentsRouter = new Elysia({ prefix: "/agents" }) return { agents: agentList }; }) + // Lists the models available on the LiteLLM gateway. Previously the frontend + // hit a LiteLLM proxy on the access-control-service; the agent service now + // owns this since it already holds the master key and talks to LiteLLM. + .get("/models", async ({ set }) => { + const { litellmBaseUrl } = getBackendConfig(); + try { + const response = await fetch(`${litellmBaseUrl}/models`, { + headers: { Authorization: `Bearer ${env.LITELLM_MASTER_KEY}` }, + }); + if (!response.ok) { + set.status = 502; + return { error: `Failed to fetch models from LiteLLM: ${response.status} ${response.statusText}` }; + } + return await response.json(); + } catch (error) { + set.status = 502; + return { error: `Failed to fetch models from LiteLLM: ${error instanceof Error ? error.message : String(error)}` }; + } + }) + .post( "/", async ({ body }) => { @@ -630,8 +652,8 @@ function printStartupMessage(app: ReturnType) { console.log(""); console.log("Environment:"); - console.log(` LLM_API_KEY: ${env.LLM_API_KEY === "dummy" ? "dummy (default)" : "set"}`); - console.log(` LLM_ENDPOINT: ${getBackendConfig().modelsEndpoint}`); + console.log(` LITELLM_MASTER_KEY: ${env.LITELLM_MASTER_KEY === "dummy" ? "dummy (default)" : "set"}`); + console.log(` LITELLM_BASE_URL: ${getBackendConfig().litellmBaseUrl}`); console.log(` WORKFLOW_COMPILING_SERVICE_ENDPOINT: ${getBackendConfig().compileEndpoint}`); console.log(` TEXERA_DASHBOARD_SERVICE_ENDPOINT: ${getBackendConfig().apiEndpoint}`); console.log(""); diff --git a/bin/k8s/templates/access-control-service-deployment.yaml b/bin/k8s/templates/access-control-service-deployment.yaml index 99713e70713..d3c1ac513c4 100644 --- a/bin/k8s/templates/access-control-service-deployment.yaml +++ b/bin/k8s/templates/access-control-service-deployment.yaml @@ -50,16 +50,6 @@ spec: value: {{ .Values.workflowComputingUnitPool.name }} - name: KUBERNETES_COMPUTE_UNIT_POOL_NAMESPACE value: {{ .Values.workflowComputingUnitPool.namespace }} - {{- if .Values.litellm.enabled }} - # LLM gateway used to serve /api/chat and /api/models to the agent service. - - name: LITELLM_BASE_URL - value: http://{{ .Values.litellm.name }}-svc:{{ .Values.litellm.service.port }} - - name: LITELLM_MASTER_KEY - valueFrom: - secretKeyRef: - name: {{ .Release.Name }}-agent-service-secret - key: litellm-master-key - {{- end }} {{- range .Values.texeraEnvVars }} - name: {{ .name }} value: "{{ .value }}" diff --git a/bin/k8s/templates/agent-service-deployment.yaml b/bin/k8s/templates/agent-service-deployment.yaml index 5437cd7d9cd..98a38f2408a 100644 --- a/bin/k8s/templates/agent-service-deployment.yaml +++ b/bin/k8s/templates/agent-service-deployment.yaml @@ -46,21 +46,21 @@ spec: # Dashboard service: workflow CRUD + operator metadata. - name: TEXERA_DASHBOARD_SERVICE_ENDPOINT value: http://{{ .Values.webserver.name }}-svc:{{ .Values.webserver.service.port }} - # LLM gateway: access-control-service serves /api/chat and /api/models, - # forwarding to LiteLLM (mirrors the single-node nginx routing). - - name: LLM_ENDPOINT - value: http://{{ .Values.accessControlService.name }}-svc:{{ .Values.accessControlService.service.port }} + # The agent service is a trusted backend, so it calls LiteLLM + # directly with the master key (no access-control proxy hop). + - name: LITELLM_BASE_URL + value: http://{{ .Values.litellm.name }}-svc:{{ .Values.litellm.service.port }} - name: WORKFLOW_COMPILING_SERVICE_ENDPOINT value: http://{{ .Values.workflowCompilingService.name }}-svc:{{ .Values.workflowCompilingService.service.port }} # Per-computing-unit execution endpoint; "{cuid}" is substituted with the # computing unit id at request time. - name: EXECUTION_ENDPOINT_TEMPLATE value: http://computing-unit-{cuid}.{{ .Values.workflowComputingUnitPool.name }}-svc.{{ .Values.workflowComputingUnitPool.namespace }}.svc.cluster.local:{{ .Values.workflowComputingUnitPool.service.port }} - - name: LLM_API_KEY + - name: LITELLM_MASTER_KEY valueFrom: secretKeyRef: name: {{ .Release.Name }}-agent-service-secret - key: llm-api-key + key: litellm-master-key # The service loads operator metadata from the dashboard service on # startup, so gate readiness on its health endpoint before the gateway # routes traffic here. /api/healthcheck needs no auth. diff --git a/bin/k8s/templates/agent-service-secret.yaml b/bin/k8s/templates/agent-service-secret.yaml index 61746a0aeb8..32ea2e4caa0 100644 --- a/bin/k8s/templates/agent-service-secret.yaml +++ b/bin/k8s/templates/agent-service-secret.yaml @@ -15,9 +15,10 @@ # specific language governing permissions and limitations # under the License. -# Shared secret for the agent service and LiteLLM. Holds the agent's gateway -# key, LiteLLM's master key, and the upstream provider API keys. Provide real -# values via `--set` or a values override file; do not commit them. +# Shared secret for the agent service and LiteLLM. Holds LiteLLM's master key +# (used by both the agent service and LiteLLM) and the upstream provider API +# keys. Provide real values via `--set` or a values override file; do not +# commit them. {{- if or .Values.agentService.enabled .Values.litellm.enabled }} apiVersion: v1 kind: Secret @@ -26,7 +27,6 @@ metadata: namespace: {{ .Release.Namespace }} type: Opaque stringData: - llm-api-key: "{{ .Values.agentService.env.llmApiKey }}" litellm-master-key: "{{ .Values.litellm.masterKey }}" {{- range $key, $value := .Values.litellm.providerApiKeys }} {{ $key }}: "{{ $value }}" diff --git a/bin/k8s/templates/gateway-routes.yaml b/bin/k8s/templates/gateway-routes.yaml index ac8096aa212..5fdb3da7c8d 100644 --- a/bin/k8s/templates/gateway-routes.yaml +++ b/bin/k8s/templates/gateway-routes.yaml @@ -66,16 +66,6 @@ spec: backendRefs: - name: config-service-svc port: 9094 - - matches: - - path: - type: PathPrefix - value: /api/models - - path: - type: PathPrefix - value: /api/chat - backendRefs: - - name: access-control-service-svc - port: 9096 - matches: - path: type: PathPrefix diff --git a/bin/k8s/values-development.yaml b/bin/k8s/values-development.yaml index 5537b39acc3..dc7078e4688 100644 --- a/bin/k8s/values-development.yaml +++ b/bin/k8s/values-development.yaml @@ -233,10 +233,6 @@ agentService: service: type: ClusterIP port: 3001 - env: - # Authenticates the agent service to the in-cluster LLM gateway - # (access-control-service / LiteLLM), not to the upstream provider. - llmApiKey: "dummy" litellm: enabled: true diff --git a/bin/k8s/values.yaml b/bin/k8s/values.yaml index 32687a7ac5e..2974c27c882 100644 --- a/bin/k8s/values.yaml +++ b/bin/k8s/values.yaml @@ -215,10 +215,6 @@ agentService: service: type: ClusterIP port: 3001 - env: - # Authenticates the agent service to the in-cluster LLM gateway - # (access-control-service / LiteLLM), not to the upstream provider. - llmApiKey: "dummy" litellm: enabled: true diff --git a/bin/single-node/.env b/bin/single-node/.env index 54aa2f5b322..bf3271a0021 100644 --- a/bin/single-node/.env +++ b/bin/single-node/.env @@ -87,13 +87,13 @@ FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT=http://file-service:9092/api/da # Toggles the texera agent panel; set false to hide it in the GUI. GUI_WORKFLOW_WORKSPACE_COPILOT_ENABLED=true -# Litellm key and URL for internal LLM traffic +# LiteLLM master key + URL for internal LLM traffic. Used by the litellm +# service and by the agent-service, which calls LiteLLM directly for chat +# completions and the model list. LITELLM_MASTER_KEY=sk-texera-internal-do-not-share LITELLM_BASE_URL=http://litellm:4000 # Configurations for agent-service to connect to Texera's services -LLM_ENDPOINT=http://nginx:8080 -LLM_API_KEY=dummy TEXERA_DASHBOARD_SERVICE_ENDPOINT=http://dashboard-service:8080 WORKFLOW_COMPILING_SERVICE_ENDPOINT=http://workflow-compiling-service:9090 WORKFLOW_EXECUTION_SERVICE_ENDPOINT=http://workflow-runtime-coordinator-service:8085 diff --git a/bin/single-node/docker-compose.yml b/bin/single-node/docker-compose.yml index e26fe8aa957..9638769d22d 100644 --- a/bin/single-node/docker-compose.yml +++ b/bin/single-node/docker-compose.yml @@ -299,7 +299,7 @@ services: timeout: 3s retries: 10 - # AccessControlService handles user permissions and proxies LLM API traffic + # AccessControlService handles user permissions and access control access-control-service: image: ${IMAGE_REGISTRY:-ghcr.io/apache}/texera-access-control-service:${IMAGE_TAG:-latest} container_name: access-control-service diff --git a/bin/single-node/nginx.conf b/bin/single-node/nginx.conf index 515a7016da0..70947c00792 100644 --- a/bin/single-node/nginx.conf +++ b/bin/single-node/nginx.conf @@ -57,19 +57,8 @@ http { proxy_set_header X-Real-IP $remote_addr; } - # LLM API traffic goes through access-control-service, which then forwards to LiteLLM. - location = /api/models { - proxy_pass http://access-control-service:9096; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - } - - location /api/chat/ { - proxy_pass http://access-control-service:9096; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - } - + # LLM traffic (chat + model list) is handled by the agent-service, which + # talks to LiteLLM directly; /api/agents below already routes there. location /api/agents { proxy_pass http://agent-service:3001; proxy_http_version 1.1; diff --git a/common/config/src/main/resources/llm.conf b/common/config/src/main/resources/llm.conf deleted file mode 100644 index 23b9360cdab..00000000000 --- a/common/config/src/main/resources/llm.conf +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# LLM Configuration -llm { - # Base URL for LiteLLM service - base-url = "http://0.0.0.0:4000" - base-url = ${?LITELLM_BASE_URL} - - # Master key for LiteLLM authentication - master-key = "" - master-key = ${?LITELLM_MASTER_KEY} -} diff --git a/common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala b/common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala deleted file mode 100644 index a85b734bad6..00000000000 --- a/common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.texera.config - -import com.typesafe.config.{Config, ConfigFactory} - -object LLMConfig { - private val conf: Config = ConfigFactory.parseResources("llm.conf").resolve() - - // LLM Service Configuration - val baseUrl: String = conf.getString("llm.base-url") - val masterKey: String = conf.getString("llm.master-key") -} diff --git a/docs/tutorials/guide-to-enable-llm-agent.md b/docs/tutorials/guide-to-enable-llm-agent.md index efa2622b6fa..65cfd86500a 100644 --- a/docs/tutorials/guide-to-enable-llm-agent.md +++ b/docs/tutorials/guide-to-enable-llm-agent.md @@ -63,21 +63,10 @@ Modify `common/config/src/main/resources/gui.conf` to enable the agent feature: ## Step 5: Configure LiteLLM Connection (Optional) -The `AccessControlService` acts as a gateway between the frontend and LiteLLM. If LiteLLM is running on a different host or port, modify `common/config/src/main/resources/llm.conf`: - -```diff - llm { - # Base URL for LiteLLM service -- base-url = "http://0.0.0.0:4000" -+ base-url = "http://your-litellm-host:4000" - - # Master key for LiteLLM authentication -- master-key = "" -+ master-key = "your-master-key" - } -``` - -Alternatively, set environment variables: +The `AgentService` is a trusted backend that calls LiteLLM directly — for both +chat completions and the model list shown in the agent panel — using LiteLLM's +master key. If LiteLLM is running on a different host or port, or is protected +by a master key, configure the agent service via environment variables: ```bash export LITELLM_BASE_URL=http://your-litellm-host:4000 @@ -86,7 +75,7 @@ export LITELLM_MASTER_KEY=your-master-key ## Step 6: Start Texera Services -Start the **all** Texera micro services, including the `AccessControlService`. +Start **all** the Texera micro services, including the `AgentService`. ## Done! diff --git a/frontend/proxy.config.json b/frontend/proxy.config.json index f68602e0714..78be3483623 100755 --- a/frontend/proxy.config.json +++ b/frontend/proxy.config.json @@ -10,16 +10,6 @@ "secure": false, "changeOrigin": true }, -"/api/models": { - "target": "http://localhost:9096", - "secure": false, - "changeOrigin": true - }, - "/api/chat/completion": { - "target": "http://localhost:9096", - "secure": false, - "changeOrigin": true - }, "/api/compile": { "target": "http://localhost:9090", "secure": false, diff --git a/frontend/src/app/workspace/service/agent/agent.service.ts b/frontend/src/app/workspace/service/agent/agent.service.ts index 2009734030b..2c922f5e085 100644 --- a/frontend/src/app/workspace/service/agent/agent.service.ts +++ b/frontend/src/app/workspace/service/agent/agent.service.ts @@ -864,7 +864,7 @@ export class AgentService { */ public fetchModelTypes(): Observable { if (!this.modelTypes$) { - this.modelTypes$ = this.http.get(`${AppSettings.getApiEndpoint()}/models`).pipe( + this.modelTypes$ = this.http.get(`${AppSettings.getApiEndpoint()}/agents/models`).pipe( map(response => response.data.map((model: LiteLLMModel) => ({ id: model.id, From b8f655a09ac1851ba06ec2b88612a5e34b3d76e4 Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sun, 7 Jun 2026 21:19:20 -0700 Subject: [PATCH 2/4] style(agent): format server.ts models endpoint with prettier --- agent-service/src/server.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/agent-service/src/server.ts b/agent-service/src/server.ts index 2b537d09105..2a328bb7c1d 100644 --- a/agent-service/src/server.ts +++ b/agent-service/src/server.ts @@ -183,7 +183,9 @@ const agentsRouter = new Elysia({ prefix: "/agents" }) return await response.json(); } catch (error) { set.status = 502; - return { error: `Failed to fetch models from LiteLLM: ${error instanceof Error ? error.message : String(error)}` }; + return { + error: `Failed to fetch models from LiteLLM: ${error instanceof Error ? error.message : String(error)}`, + }; } }) From e9d8e174e2d3b237d91beef1796b252927237e97 Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sun, 7 Jun 2026 21:23:16 -0700 Subject: [PATCH 3/4] test(agent): cover GET /api/agents/models success and LiteLLM failure paths --- agent-service/src/server.test.ts | 69 +++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/agent-service/src/server.test.ts b/agent-service/src/server.test.ts index 0f618e599c2..5eb15b6e248 100644 --- a/agent-service/src/server.test.ts +++ b/agent-service/src/server.test.ts @@ -17,7 +17,7 @@ * under the License. */ -import { beforeEach, describe, expect, test } from "bun:test"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { buildApp, _resetAgentStoreForTests } from "./server"; import { env } from "./config/env"; @@ -200,6 +200,73 @@ describe("Agent control routes", () => { }); }); +describe(`GET ${API}/agents/models`, () => { + // The endpoint fetches the model list from LiteLLM directly (the agent + // service holds the master key). Stub global fetch so no real LiteLLM is + // needed, and capture the outgoing request to assert URL + auth header. + const realFetch = globalThis.fetch; + let calls: Array<{ url: string; authorization: string | null }>; + + function mockLiteLLM(responder: (reqUrl: string) => Response | Promise): void { + globalThis.fetch = (async ( + input: Parameters[0], + init?: Parameters[1] + ): Promise => { + const reqUrl = String(input); + const authorization = new Headers(init?.headers).get("Authorization"); + calls.push({ url: reqUrl, authorization }); + return responder(reqUrl); + }) as typeof globalThis.fetch; + } + + beforeEach(() => { + calls = []; + }); + + afterEach(() => { + globalThis.fetch = realFetch; + }); + + test("forwards the LiteLLM model list and calls it directly with the master key", async () => { + const upstream = { + data: [{ id: "gpt-5-mini", object: "model", created: 0, owned_by: "openai" }], + object: "list", + }; + mockLiteLLM( + () => new Response(JSON.stringify(upstream), { status: 200, headers: { "Content-Type": "application/json" } }) + ); + + const res = await getJson(`${API}/agents/models`); + expect(res.status).toBe(200); + expect(await readJson(res)).toEqual(upstream); + + // One call, straight to /models, carrying the master key. + expect(calls).toHaveLength(1); + expect(calls[0].url).toBe(`${env.LITELLM_BASE_URL}/models`); + expect(calls[0].authorization).toBe(`Bearer ${env.LITELLM_MASTER_KEY}`); + }); + + test("returns 502 when LiteLLM responds with a non-OK status", async () => { + mockLiteLLM(() => new Response("unauthorized", { status: 401 })); + + const res = await getJson(`${API}/agents/models`); + expect(res.status).toBe(502); + const body = await readJson<{ error: string }>(res); + expect(body.error).toContain("Failed to fetch models from LiteLLM"); + }); + + test("returns 502 when LiteLLM is unreachable", async () => { + mockLiteLLM(() => { + throw new Error("connect ECONNREFUSED 127.0.0.1:4000"); + }); + + const res = await getJson(`${API}/agents/models`); + expect(res.status).toBe(502); + const body = await readJson<{ error: string }>(res); + expect(body.error).toContain("Failed to fetch models from LiteLLM"); + }); +}); + describe(`PATCH ${API}/agents/:id/settings`, () => { test("updates settings and returns the new values", async () => { const created = await readJson<{ id: string }>(await postJson(`${API}/agents`, { modelType: "m" })); From e291fd5a68b197a81f4db729adf33527e89ff84b Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Mon, 8 Jun 2026 00:28:58 -0700 Subject: [PATCH 4/4] feat(agent): authenticate agent-service requests via JWT + gateway ext_authz Phase 1 authentication for the agent service (see #5561): - agent-service: real HS256 JWT verification using the shared secret read from auth.conf (env AUTH_JWT_SECRET); a guard rejects unauthenticated REST (Bearer) and WebSocket (access-token query) requests. auth.conf is bundled into the image. - access-control-service: authorize() gains an /api/agents branch that verifies the JWT and requires REGULAR/ADMIN, returning the trusted x-user-* headers (allow-all per-agent for now; per-agent ownership is deferred to #5302). - gateways: nginx auth_request (single-node) and an Envoy SecurityPolicy (k8s) route /api/agents through the access-control-service; the agent-service deployment now receives AUTH_JWT_SECRET. - frontend: attach the JWT to every agent call (Bearer on REST, access-token on the /react WebSocket). - tests: agent-service jwt.test.ts + server.test.ts guard cases; access-control AgentAccessAuthSpec. --- .../resource/AccessControlResource.scala | 60 +++++++- .../resource/AgentAccessAuthSpec.scala | 94 +++++++++++++ agent-service/src/api/auth-api.ts | 42 ++---- agent-service/src/config/env.ts | 7 + agent-service/src/config/jwt.test.ts | 94 +++++++++++++ agent-service/src/config/jwt.ts | 131 ++++++++++++++++++ agent-service/src/server.test.ts | 75 +++++++++- agent-service/src/server.ts | 22 ++- bin/agent-service.dockerfile | 4 + .../templates/agent-service-deployment.yaml | 8 ++ .../agent-service-security-policy.yaml | 43 ++++++ bin/single-node/nginx.conf | 22 +++ .../workspace/service/agent/agent.service.ts | 51 ++++--- 13 files changed, 599 insertions(+), 54 deletions(-) create mode 100644 access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala create mode 100644 agent-service/src/config/jwt.test.ts create mode 100644 agent-service/src/config/jwt.ts create mode 100644 bin/k8s/templates/agent-service-security-policy.yaml diff --git a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala index 8a225ad4c61..d279b9cf386 100644 --- a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala +++ b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala @@ -27,7 +27,7 @@ import org.apache.texera.auth.JwtParser.parseToken import org.apache.texera.auth.SessionUser import org.apache.texera.auth.util.{ComputingUnitAccess, HeaderField} import org.apache.texera.config.KubernetesConfig -import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum +import org.apache.texera.dao.jooq.generated.enums.{PrivilegeEnum, UserRoleEnum} import java.net.URLDecoder import java.nio.charset.StandardCharsets @@ -44,6 +44,8 @@ object AccessControlResource extends LazyLogging { private val apiExecutionsStats: Regex = """.*/api/executions/[0-9]+/stats/[0-9]+.*""".r private val apiExecutionsResultExport: Regex = """.*/api/executions/result/export.*""".r private val pveRoute: Regex = """^/?(?:auth/)?(?:api/|wsapi/)?pve(?:/.*)?$""".r + // Agent service: authenticate any /api/agents request (Phase 1 — see #5561). + private val apiAgents: Regex = """.*/api/agents.*""".r // Path patterns whose cuid lives in the URL path rather than the query string. private val pvePvesCuidPath: Regex = """^/?(?:auth/)?(?:api/|wsapi/)?pve/pves/([0-9]+)$""".r private val pvePackagesCuidPath: Regex = @@ -68,12 +70,68 @@ object AccessControlResource extends LazyLogging { case wsapiWorkflowWebsocket() | apiExecutionsStats() | apiExecutionsResultExport() | pveRoute() => checkComputingUnitAccess(uriInfo, headers, bodyOpt) + case apiAgents() => + checkAgentAccess(uriInfo, headers, bodyOpt) case _ => logger.warn(s"No authorization logic for path: $path. Denying access.") Response.status(Response.Status.FORBIDDEN).build() } } + // Extract the bearer token from the access-token query param, the + // Authorization header, or a "token" field in the body (in that order). + private def extractBearerToken( + uriInfo: UriInfo, + headers: HttpHeaders, + bodyOpt: Option[String] + ): String = { + val qToken = Option(uriInfo.getQueryParameters().getFirst("access-token")) + .map(_.trim) + .filter(_.nonEmpty) + val hToken = Option(headers.getRequestHeader("Authorization")) + .flatMap(_.asScala.headOption) + .map(_.replaceFirst("(?i)^Bearer\\s+", "")) // case-insensitive "Bearer " + .map(_.trim) + .filter(_.nonEmpty) + val bToken = bodyOpt.flatMap(extractTokenFromBody) + qToken.orElse(hToken).orElse(bToken).getOrElse("") + } + + // Phase 1 agent authorization: authenticate the JWT and require a + // REGULAR/ADMIN role. Any such user may reach any agent for now; per-agent + // ownership is deferred (see #5302 / #5561). On success, forward the user + // identity headers so the agent service can trust them. + private def checkAgentAccess( + uriInfo: UriInfo, + headers: HttpHeaders, + bodyOpt: Option[String] + ): Response = { + val token = extractBearerToken(uriInfo, headers, bodyOpt) + val userSession: Optional[SessionUser] = + try parseToken(token) + catch { + case e: Exception => + logger.error(s"Failed parsing token for agent request: $e") + Optional.empty() + } + + if (userSession.isEmpty) { + return Response.status(Response.Status.UNAUTHORIZED).build() + } + + val user = userSession.get() + if (!(user.isRoleOf(UserRoleEnum.REGULAR) || user.isRoleOf(UserRoleEnum.ADMIN))) { + return Response.status(Response.Status.FORBIDDEN).build() + } + + Response + .ok() + .header(HeaderField.UserId, user.getUid.toString) + .header(HeaderField.UserName, user.getName) + .header(HeaderField.UserEmail, user.getEmail) + .build() + } + private def checkComputingUnitAccess( uriInfo: UriInfo, headers: HttpHeaders, diff --git a/access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala b/access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala new file mode 100644 index 00000000000..b723f6837a6 --- /dev/null +++ b/access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.service.resource + +import jakarta.ws.rs.core.{HttpHeaders, MultivaluedHashMap, Response, UriInfo} +import org.apache.texera.auth.JwtAuth +import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum +import org.apache.texera.dao.jooq.generated.tables.pojos.User +import org.mockito.Mockito.{mock, when} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.net.URI +import java.util.Collections + +/** + * Exercises the `/api/agents` branch of [[AccessControlResource.authorize]] — + * the Phase 1 ext_authz gate that authenticates the JWT and requires a + * REGULAR/ADMIN role. Tokens are minted with the same [[JwtAuth]] the rest of + * Texera uses, so this is a real signature round-trip against auth.conf. + */ +class AgentAccessAuthSpec extends AnyFlatSpec with Matchers { + + private def token(role: UserRoleEnum): String = { + val u = new User() + u.setUid(7) + u.setName("agent-user") + u.setEmail("agent-user@example.com") + u.setGoogleId(null) + u.setRole(role) + JwtAuth.jwtToken(JwtAuth.jwtClaims(u, expireInDays = 1)) + } + + private def authorize(path: String, authHeader: Option[String]): Response = { + val uriInfo = mock(classOf[UriInfo]) + when(uriInfo.getPath).thenReturn(path) + when(uriInfo.getRequestUri).thenReturn(URI.create(s"http://localhost/$path")) + when(uriInfo.getQueryParameters()).thenReturn(new MultivaluedHashMap[String, String]()) + + val headers = mock(classOf[HttpHeaders]) + when(headers.getRequestHeaders).thenReturn(new MultivaluedHashMap[String, String]()) + when(headers.getRequestHeader("Authorization")).thenReturn( + authHeader.map((h: String) => Collections.singletonList(h)).orNull + ) + + AccessControlResource.authorize(uriInfo, headers) + } + + private val agentPath = "auth/api/agents/agent-1" + + "authorize on /api/agents" should "return 200 for a REGULAR user" in { + authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.REGULAR)}")).getStatus shouldBe 200 + } + + it should "return 200 for an ADMIN user" in { + authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.ADMIN)}")).getStatus shouldBe 200 + } + + it should "forward the user identity headers on success" in { + val resp = authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.REGULAR)}")) + resp.getHeaderString("x-user-id") shouldBe "7" + resp.getHeaderString("x-user-name") shouldBe "agent-user" + resp.getHeaderString("x-user-email") shouldBe "agent-user@example.com" + } + + it should "return 401 when no token is present" in { + authorize(agentPath, None).getStatus shouldBe 401 + } + + it should "return 401 for a malformed / mis-signed token" in { + authorize(agentPath, Some("Bearer not-a-real-jwt")).getStatus shouldBe 401 + } + + it should "return 403 for an INACTIVE-role user" in { + authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.INACTIVE)}")).getStatus shouldBe 403 + } +} diff --git a/agent-service/src/api/auth-api.ts b/agent-service/src/api/auth-api.ts index 087f93ac46f..02be1e0c1a1 100644 --- a/agent-service/src/api/auth-api.ts +++ b/agent-service/src/api/auth-api.ts @@ -18,43 +18,29 @@ */ import type { UserInfo } from "../types/agent"; +import { verifyToken } from "../config/jwt"; export type { UserInfo } from "../types/agent"; +export { verifyToken } from "../config/jwt"; -function decodeJWT(token: string): any { - try { - const parts = token.split("."); - if (parts.length !== 3) { - throw new Error("Invalid JWT format"); - } - return JSON.parse(Buffer.from(parts[1], "base64").toString("utf-8")); - } catch (error) { - throw new Error(`Failed to decode JWT: ${error}`); - } -} - +/** Verify the token's signature + expiry and return its user claims. Throws on + * an invalid/expired/mis-signed token. */ export function extractUserFromToken(token: string): UserInfo { - const payload = decodeJWT(token); + const user = verifyToken(token); + if (!user) { + throw new Error("Invalid or expired token"); + } return { - uid: payload.userId, - name: payload.sub, - email: payload.email || "", - role: payload.role || "REGULAR", + uid: user.uid, + name: user.name, + email: user.email, + role: user.role, }; } -function isTokenExpired(token: string): boolean { - try { - const payload = decodeJWT(token); - if (!payload.exp) return false; - return Date.now() >= payload.exp * 1000; - } catch { - return true; - } -} - +/** True only when the token is genuinely valid (signature + expiry verified). */ export function validateToken(token: string): boolean { - return !isTokenExpired(token); + return verifyToken(token) !== null; } export function createAuthHeaders(token: string): Record { diff --git a/agent-service/src/config/env.ts b/agent-service/src/config/env.ts index d8828d0647d..5066901e3bf 100644 --- a/agent-service/src/config/env.ts +++ b/agent-service/src/config/env.ts @@ -36,6 +36,13 @@ const EnvSchema = z.object({ WORKFLOW_COMPILING_SERVICE_ENDPOINT: z.string().url().default("http://localhost:9090"), WORKFLOW_EXECUTION_SERVICE_ENDPOINT: z.string().url().default("http://localhost:8085"), EXECUTION_ENDPOINT_TEMPLATE: z.string().optional(), + + // Shared JWT secret (HS256). Overrides the default in auth.conf, mirroring + // common/config AuthConfig. When unset, the secret is read from auth.conf. + AUTH_JWT_SECRET: z.string().optional(), + // Path to auth.conf; defaults are probed in config/jwt.ts (bundled in the + // image, or the repo path in local dev). + AUTH_CONF_PATH: z.string().optional(), }); export const env = EnvSchema.parse(process.env); diff --git a/agent-service/src/config/jwt.test.ts b/agent-service/src/config/jwt.test.ts new file mode 100644 index 00000000000..244b79ad9ba --- /dev/null +++ b/agent-service/src/config/jwt.test.ts @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { describe, expect, test } from "bun:test"; +import { createHmac } from "node:crypto"; +import { JWT_SECRET, verifyToken } from "./jwt"; + +function b64url(input: string | Buffer): string { + return Buffer.from(input).toString("base64url"); +} + +// Mints an HS256 token the same way org.apache.texera.auth.JwtAuth does. +function sign(payload: Record, opts: { secret?: string; alg?: string } = {}): string { + const alg = opts.alg ?? "HS256"; + const secret = opts.secret ?? JWT_SECRET; + const header = b64url(JSON.stringify({ alg, typ: "JWT" })); + const body = b64url(JSON.stringify(payload)); + const signature = b64url(createHmac("sha256", new TextEncoder().encode(secret)).update(`${header}.${body}`).digest()); + return `${header}.${body}.${signature}`; +} + +const now = () => Math.floor(Date.now() / 1000); +const validClaims = () => ({ + sub: "alice", + userId: 42, + email: "alice@example.com", + role: "REGULAR", + exp: now() + 3600, +}); + +describe("verifyToken", () => { + test("accepts a correctly-signed, unexpired token and returns its claims", () => { + const user = verifyToken(sign(validClaims())); + expect(user).not.toBeNull(); + expect(user).toEqual({ uid: 42, name: "alice", email: "alice@example.com", role: "REGULAR" }); + }); + + test("coerces a string userId claim to a number", () => { + const user = verifyToken(sign({ ...validClaims(), userId: "7" })); + expect(user?.uid).toBe(7); + }); + + test("rejects a token signed with a different secret", () => { + expect(verifyToken(sign(validClaims(), { secret: "not-the-real-secret" }))).toBeNull(); + }); + + test("rejects a tampered payload (signature no longer matches)", () => { + const token = sign(validClaims()); + const [h, , s] = token.split("."); + const forged = b64url(JSON.stringify({ ...validClaims(), userId: 999 })); + expect(verifyToken(`${h}.${forged}.${s}`)).toBeNull(); + }); + + test("rejects an expired token (beyond the 30s clock skew)", () => { + expect(verifyToken(sign({ ...validClaims(), exp: now() - 60 }))).toBeNull(); + }); + + test("accepts a just-expired token within the 30s clock skew", () => { + expect(verifyToken(sign({ ...validClaims(), exp: now() - 10 }))).not.toBeNull(); + }); + + test("rejects a non-HS256 algorithm (none / alg-confusion)", () => { + expect(verifyToken(sign(validClaims(), { alg: "none" }))).toBeNull(); + }); + + test("rejects tokens missing required exp / sub / userId claims", () => { + expect(verifyToken(sign({ sub: "alice", userId: 42 }))).toBeNull(); // no exp + expect(verifyToken(sign({ userId: 42, exp: now() + 60 }))).toBeNull(); // no sub + expect(verifyToken(sign({ sub: "alice", exp: now() + 60 }))).toBeNull(); // no userId + }); + + test("rejects malformed / empty tokens", () => { + expect(verifyToken(undefined)).toBeNull(); + expect(verifyToken("")).toBeNull(); + expect(verifyToken("not-a-jwt")).toBeNull(); + expect(verifyToken("only.two")).toBeNull(); + }); +}); diff --git a/agent-service/src/config/jwt.ts b/agent-service/src/config/jwt.ts new file mode 100644 index 00000000000..ec10dccdaba --- /dev/null +++ b/agent-service/src/config/jwt.ts @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { createHmac, timingSafeEqual } from "node:crypto"; +import { existsSync, readFileSync } from "node:fs"; +import { resolve } from "node:path"; +import { env } from "./env"; +import { createLogger } from "../logger"; + +const log = createLogger("Jwt"); + +// Token issuance lives in the Scala services (org.apache.texera.auth.JwtAuth): +// HS256 over the UTF-8 bytes of the secret, with a required `exp` and `sub` +// and a 30s allowed clock skew. This module mirrors the verification so the +// agent service can validate the same tokens without a gateway. +const CLOCK_SKEW_SECONDS = 30; + +// auth.conf default, used as the last-resort fallback when neither the env +// override nor the file is available (matches AuthConfig's literal default). +const AUTH_CONF_DEFAULT_SECRET = "8a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d"; + +/** Read the `256-bit-secret` default out of auth.conf, if the file is found. */ +function readSecretFromAuthConf(): string | undefined { + const candidates = [ + env.AUTH_CONF_PATH, // explicit override + "auth.conf", // bundled next to the app in the container image + "../common/config/src/main/resources/auth.conf", // repo layout in local dev + ].filter((p): p is string => typeof p === "string" && p.length > 0); + + for (const candidate of candidates) { + const path = resolve(process.cwd(), candidate); + if (!existsSync(path)) continue; + // The first quoted `256-bit-secret = "..."` is the literal default; the + // second line is the `${?AUTH_JWT_SECRET}` env override (handled below). + const match = readFileSync(path, "utf-8").match(/256-bit-secret\s*=\s*"([^"]*)"/); + if (match) { + log.info({ path }, "loaded JWT secret default from auth.conf"); + return match[1]; + } + } + log.warn("auth.conf not found; falling back to the built-in default JWT secret"); + return undefined; +} + +// Resolution order mirrors HOCON's `256-bit-secret = ""; 256-bit-secret +// = ${?AUTH_JWT_SECRET}`, and the `.toLowerCase()` normalization AuthConfig +// applies. ("random" is intentionally unsupported here — it cannot match across +// processes, so deployments share a fixed secret.) +export const JWT_SECRET: string = ( + env.AUTH_JWT_SECRET || + readSecretFromAuthConf() || + AUTH_CONF_DEFAULT_SECRET +).toLowerCase(); + +const SECRET_BYTES = new TextEncoder().encode(JWT_SECRET); + +export interface JwtUser { + uid: number; + name: string; + email: string; + role: string; +} + +function decodeSegment(segment: string): unknown { + return JSON.parse(Buffer.from(segment, "base64url").toString("utf-8")); +} + +/** + * Verify an HS256 JWT issued by the Scala services and return its claims, or + * null if the token is missing, malformed, mis-signed, expired, or uses a + * different algorithm. Mirrors org.apache.texera.auth.JwtParser.parseToken. + */ +export function verifyToken(token: string | undefined | null): JwtUser | null { + if (!token) return null; + try { + const parts = token.split("."); + if (parts.length !== 3) return null; + const [headerB64, payloadB64, signatureB64] = parts; + + const header = decodeSegment(headerB64) as { alg?: string }; + // Reject anything that is not HS256 (defends against "none" / alg-confusion). + if (header.alg !== "HS256") return null; + + const expected = createHmac("sha256", SECRET_BYTES).update(`${headerB64}.${payloadB64}`).digest(); + const actual = Buffer.from(signatureB64, "base64url"); + if (expected.length !== actual.length || !timingSafeEqual(expected, actual)) { + return null; + } + + const payload = decodeSegment(payloadB64) as { + exp?: number; + sub?: string; + userId?: number | string; + email?: string; + role?: string; + }; + + // `exp` and `sub` are required by JwtAuth.jwtConsumer. + if (typeof payload.exp !== "number") return null; + if (typeof payload.sub !== "string" || payload.sub.length === 0) return null; + if (payload.userId === undefined || payload.userId === null) return null; + + const nowSeconds = Math.floor(Date.now() / 1000); + if (nowSeconds > payload.exp + CLOCK_SKEW_SECONDS) return null; + + return { + uid: Number(payload.userId), + name: payload.sub, + email: typeof payload.email === "string" ? payload.email : "", + role: typeof payload.role === "string" ? payload.role : "REGULAR", + }; + } catch { + return null; + } +} diff --git a/agent-service/src/server.test.ts b/agent-service/src/server.test.ts index 5eb15b6e248..eeff98fe742 100644 --- a/agent-service/src/server.test.ts +++ b/agent-service/src/server.test.ts @@ -18,8 +18,10 @@ */ import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { createHmac } from "node:crypto"; import { buildApp, _resetAgentStoreForTests } from "./server"; import { env } from "./config/env"; +import { JWT_SECRET } from "./config/jwt"; const API = env.API_PREFIX; const app = buildApp(); @@ -28,11 +30,38 @@ function url(path: string): string { return `http://localhost${path}`; } +// Mint a valid HS256 token (same shape JwtAuth issues) so requests pass the +// agent-service auth guard; tests that probe the guard itself omit/forge it. +function b64url(input: string | Buffer): string { + return Buffer.from(input).toString("base64url"); +} +function signToken(claims: Record = {}): string { + const header = b64url(JSON.stringify({ alg: "HS256", typ: "JWT" })); + const payload = b64url( + JSON.stringify({ + sub: "tester", + userId: 1, + email: "tester@example.com", + role: "REGULAR", + exp: Math.floor(Date.now() / 1000) + 3600, + ...claims, + }) + ); + const signature = b64url( + createHmac("sha256", new TextEncoder().encode(JWT_SECRET)).update(`${header}.${payload}`).digest() + ); + return `${header}.${payload}.${signature}`; +} +const VALID_TOKEN = signToken(); +function authHeaders(extra: Record = {}): Record { + return { Authorization: `Bearer ${VALID_TOKEN}`, ...extra }; +} + async function postJson(path: string, body: unknown): Promise { return app.handle( new Request(url(path), { method: "POST", - headers: { "Content-Type": "application/json" }, + headers: authHeaders({ "Content-Type": "application/json" }), body: JSON.stringify(body), }) ); @@ -42,18 +71,18 @@ async function patchJson(path: string, body: unknown): Promise { return app.handle( new Request(url(path), { method: "PATCH", - headers: { "Content-Type": "application/json" }, + headers: authHeaders({ "Content-Type": "application/json" }), body: JSON.stringify(body), }) ); } async function getJson(path: string): Promise { - return app.handle(new Request(url(path))); + return app.handle(new Request(url(path), { headers: authHeaders() })); } async function del(path: string): Promise { - return app.handle(new Request(url(path), { method: "DELETE" })); + return app.handle(new Request(url(path), { method: "DELETE", headers: authHeaders() })); } async function readJson(res: Response): Promise { @@ -72,6 +101,44 @@ describe(`GET ${API}/healthcheck`, () => { expect(body.status).toBe("ok"); expect(typeof body.timestamp).toBe("string"); }); + + test("does not require a token (readiness probe)", async () => { + const res = await app.handle(new Request(url(`${API}/healthcheck`))); + expect(res.status).toBe(200); + }); +}); + +describe("auth guard on agent routes", () => { + test("rejects a request with no Authorization header (401)", async () => { + const res = await app.handle(new Request(url(`${API}/agents`))); + expect(res.status).toBe(401); + expect(await readJson<{ error: string }>(res)).toEqual({ error: "Unauthorized" }); + }); + + test("rejects an invalid / mis-signed token (401)", async () => { + const res = await app.handle( + new Request(url(`${API}/agents`), { headers: { Authorization: "Bearer not-a-valid-jwt" } }) + ); + expect(res.status).toBe(401); + }); + + test("rejects an expired token (401)", async () => { + const expired = signToken({ exp: Math.floor(Date.now() / 1000) - 3600 }); + const res = await app.handle( + new Request(url(`${API}/agents`), { headers: { Authorization: `Bearer ${expired}` } }) + ); + expect(res.status).toBe(401); + }); + + test("accepts a valid token", async () => { + const res = await app.handle(new Request(url(`${API}/agents`), { headers: authHeaders() })); + expect(res.status).toBe(200); + }); + + test("guards the models endpoint too", async () => { + const res = await app.handle(new Request(url(`${API}/agents/models`))); + expect(res.status).toBe(401); + }); }); describe(`POST ${API}/agents`, () => { diff --git a/agent-service/src/server.ts b/agent-service/src/server.ts index 2a328bb7c1d..f208a1d877b 100644 --- a/agent-service/src/server.ts +++ b/agent-service/src/server.ts @@ -23,7 +23,7 @@ import { createOpenAI } from "@ai-sdk/openai"; import { TexeraAgent } from "./agent/texera-agent"; import { getVisibleResultHeaders } from "./agent/tools/tools-utility"; import { getBackendConfig } from "./api/backend-api"; -import { extractUserFromToken, validateToken } from "./api/auth-api"; +import { extractUserFromToken, validateToken, verifyToken } from "./api/auth-api"; import { retrieveWorkflow } from "./api/workflow-api"; import { WorkflowSystemMetadata } from "./agent/util/workflow-system-metadata"; import { env } from "./config/env"; @@ -162,6 +162,17 @@ const agentsRouter = new Elysia({ prefix: "/agents" }) set.status = 500; return { error: errorMessage || "Internal server error" }; }) + // Authenticate every agent request by verifying the Bearer JWT ourselves + // (defense in depth — the gateway ext_authz also checks it, but this also + // covers direct access, e.g. bare-metal dev). The WebSocket route is guarded + // separately in its open() handler via the access-token query param. + .onBeforeHandle(({ request, set }) => { + const token = (request.headers.get("authorization") ?? "").replace(/^Bearer\s+/i, "").trim(); + if (!verifyToken(token)) { + set.status = 401; + return { error: "Unauthorized" }; + } + }) .get("/", () => { const agentList = Array.from(agentStore.entries()).map(([id, agent]) => getAgentInfo(id, agent)); return { agents: agentList }; @@ -509,6 +520,15 @@ export function buildApp() { ) .ws(`${env.API_PREFIX}/agents/:id/react`, { open(ws) { + // Browsers can't set headers on a WebSocket, so the JWT arrives as the + // access-token query param (same convention as the workflow WS). + const token = (ws.data as any).query?.["access-token"]; + if (!verifyToken(token)) { + ws.send(JSON.stringify({ type: "error", error: "Unauthorized" })); + ws.close(); + return; + } + const agentId = (ws.data as any).params?.id; wsLog.info({ agentId }, "client connected"); diff --git a/bin/agent-service.dockerfile b/bin/agent-service.dockerfile index 7edf38287aa..5722644735a 100644 --- a/bin/agent-service.dockerfile +++ b/bin/agent-service.dockerfile @@ -35,6 +35,10 @@ RUN bun install --frozen-lockfile --production COPY agent-service/src ./src COPY agent-service/tsconfig.json ./ +# Shared JWT secret default (HS256) used to verify Bearer tokens. The effective +# secret is AUTH_JWT_SECRET when set; otherwise this file's default is used. +COPY common/config/src/main/resources/auth.conf ./auth.conf + COPY agent-service/LICENSE-binary ./LICENSE COPY NOTICE ./NOTICE COPY DISCLAIMER ./DISCLAIMER diff --git a/bin/k8s/templates/agent-service-deployment.yaml b/bin/k8s/templates/agent-service-deployment.yaml index 98a38f2408a..c7846732f94 100644 --- a/bin/k8s/templates/agent-service-deployment.yaml +++ b/bin/k8s/templates/agent-service-deployment.yaml @@ -61,6 +61,14 @@ spec: secretKeyRef: name: {{ .Release.Name }}-agent-service-secret key: litellm-master-key + # Shared JWT secret used to verify Bearer tokens. Sourced from the + # same texeraEnvVars entry the Scala services use, so they agree. + {{- range .Values.texeraEnvVars }} + {{- if eq .name "AUTH_JWT_SECRET" }} + - name: {{ .name }} + value: "{{ .value }}" + {{- end }} + {{- end }} # The service loads operator metadata from the dashboard service on # startup, so gate readiness on its health endpoint before the gateway # routes traffic here. /api/healthcheck needs no auth. diff --git a/bin/k8s/templates/agent-service-security-policy.yaml b/bin/k8s/templates/agent-service-security-policy.yaml new file mode 100644 index 00000000000..b88a31b6dcb --- /dev/null +++ b/bin/k8s/templates/agent-service-security-policy.yaml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Authenticates every /api/agents request through the access-control-service +# before it reaches the agent service (mirrors the dynamic-routes ext-auth). +# The access-control-service verifies the JWT and requires a REGULAR/ADMIN +# role, then injects the trusted x-user-* identity headers. +{{- if .Values.agentService.enabled }} +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: SecurityPolicy +metadata: + name: {{ .Release.Name }}-agent-ext-auth + namespace: {{ .Release.Namespace }} +spec: + targetRefs: + - group: gateway.networking.k8s.io + kind: HTTPRoute + name: {{ .Release.Name }}-agent-service-route + extAuth: + http: + backendRefs: + - name: {{ .Values.accessControlService.name }}-svc + port: {{ .Values.accessControlService.service.port }} + path: /api/auth + headersToBackend: + - x-user-id + - x-user-name + - x-user-email +{{- end }} diff --git a/bin/single-node/nginx.conf b/bin/single-node/nginx.conf index 70947c00792..52091137a00 100644 --- a/bin/single-node/nginx.conf +++ b/bin/single-node/nginx.conf @@ -24,6 +24,13 @@ http { # Suppress per-request access logs; errors still go to error_log. access_log off; + # Upstream for the agent ext_authz subrequest. Declaring it as an upstream + # lets the variable proxy_pass below ($request_uri) resolve the name at + # startup, without needing a separate runtime resolver. + upstream access_control_authz { + server access-control-service:9096; + } + server { listen 8080; @@ -59,7 +66,10 @@ http { # LLM traffic (chat + model list) is handled by the agent-service, which # talks to LiteLLM directly; /api/agents below already routes there. + # Each request is first authenticated by the access-control-service + # (ext_authz) via the auth_request subrequest below. location /api/agents { + auth_request /_agent_auth; proxy_pass http://agent-service:3001; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; @@ -70,6 +80,18 @@ http { proxy_send_timeout 1d; } + # Internal ext_authz subrequest: the access-control-service authorizes + # /api/agents by JWT. $request_uri preserves the original path so its + # /api/agents branch matches (same shape Envoy produces); the + # Authorization header and the WS access-token query ride along. + location = /_agent_auth { + internal; + proxy_pass http://access_control_authz/api/auth$request_uri; + proxy_pass_request_body off; + proxy_set_header Content-Length ""; + proxy_set_header X-Original-URI $request_uri; + } + location /api/ { proxy_pass http://dashboard-service:8080; proxy_set_header Host $host; diff --git a/frontend/src/app/workspace/service/agent/agent.service.ts b/frontend/src/app/workspace/service/agent/agent.service.ts index 2c922f5e085..8e17fab6fe6 100644 --- a/frontend/src/app/workspace/service/agent/agent.service.ts +++ b/frontend/src/app/workspace/service/agent/agent.service.ts @@ -240,6 +240,12 @@ export class AgentService { */ private agentHeaders(agentId?: string): { headers: HttpHeaders } { let headers = new HttpHeaders(); + // The access-control-service authorizes every agent request by JWT, so + // attach the Bearer token to all agent-service calls. + const token = AuthService.getAccessToken(); + if (token) { + headers = headers.set("Authorization", `Bearer ${token}`); + } if (agentId) { const wid = this.agentStateTracking.get(agentId)?.workflowId; if (wid !== undefined) { @@ -408,9 +414,12 @@ export class AgentService { * Start WebSocket connection for real-time ReActSteps updates */ private startStatePolling(agentId: string, tracking: AgentStateTracking): void { - // Build WebSocket URL + // Build WebSocket URL. Browsers can't set headers on a WebSocket, so the + // JWT is passed as the access-token query param (same as the workflow WS). const wsProtocol = window.location.protocol === "https:" ? "wss:" : "ws:"; - const wsUrl = `${wsProtocol}//${window.location.host}${this.AGENT_API_BASE}/agents/${agentId}/react`; + const token = AuthService.getAccessToken(); + const tokenParam = token ? `?access-token=${encodeURIComponent(token)}` : ""; + const wsUrl = `${wsProtocol}//${window.location.host}${this.AGENT_API_BASE}/agents/${agentId}/react${tokenParam}`; const ws = new WebSocket(wsUrl); tracking.websocket = ws; @@ -712,7 +721,7 @@ export class AgentService { } } - return this.http.post(`${this.AGENT_API_BASE}/agents`, body).pipe( + return this.http.post(`${this.AGENT_API_BASE}/agents`, body, this.agentHeaders()).pipe( map(response => { const agentInfo: AgentInfo = { id: response.id, @@ -792,7 +801,7 @@ export class AgentService { * Also syncs local cache with backend - removes any stale agents that no longer exist on the backend. */ public getAllAgents(): Observable { - return this.http.get(`${this.AGENT_API_BASE}/agents`).pipe( + return this.http.get(`${this.AGENT_API_BASE}/agents`, this.agentHeaders()).pipe( map(response => { const agents = response.agents.map(a => ({ id: a.id, @@ -864,21 +873,23 @@ export class AgentService { */ public fetchModelTypes(): Observable { if (!this.modelTypes$) { - this.modelTypes$ = this.http.get(`${AppSettings.getApiEndpoint()}/agents/models`).pipe( - map(response => - response.data.map((model: LiteLLMModel) => ({ - id: model.id, - name: this.formatModelName(model.id), - description: `Model: ${model.id}`, - icon: "robot", - })) - ), - catchError((error: unknown) => { - console.error("Failed to fetch models from API:", error); - return of([]); - }), - shareReplay(1) - ); + this.modelTypes$ = this.http + .get(`${AppSettings.getApiEndpoint()}/agents/models`, this.agentHeaders()) + .pipe( + map(response => + response.data.map((model: LiteLLMModel) => ({ + id: model.id, + name: this.formatModelName(model.id), + description: `Model: ${model.id}`, + icon: "robot", + })) + ), + catchError((error: unknown) => { + console.error("Failed to fetch models from API:", error); + return of([]); + }), + shareReplay(1) + ); } return this.modelTypes$; } @@ -1036,7 +1047,7 @@ export class AgentService { * The backend broadcasts headChange + visible steps via WebSocket to all clients. */ public checkoutStep(agentId: string, stepId: string): Observable { - return this.http.post(`${this.AGENT_API_BASE}/agents/${agentId}/checkout`, { stepId }); + return this.http.post(`${this.AGENT_API_BASE}/agents/${agentId}/checkout`, { stepId }, this.agentHeaders(agentId)); } /**