From cf64549290913b55eccf82a6f1034bd8119395ec Mon Sep 17 00:00:00 2001
From: Bob Bai <bobbai0509@gmail.com>
Date: Sun, 7 Jun 2026 21:00:52 -0700
Subject: [PATCH 1/4] refactor(agent): call LiteLLM directly, remove
 access-control LLM proxy

The LiteLLM HTTP proxy in access-control-service existed so the browser
could reach LiteLLM without holding the master key. The agent-service is
now a trusted backend that holds the key itself, so the proxy hop is
redundant.

- agent-service: build the OpenAI client against LITELLM_BASE_URL with the
  master key; serve the model list at GET /api/agents/models from LiteLLM.
- access-control-service: delete LiteLLMProxyResource / LiteLLMModelsResource,
  their auth spec, and the now-dead RolesAllowedDynamicFeature registration;
  drop LLMConfig and llm.conf.
- frontend: fetch models from /api/agents/models.
- routing/deploy: drop the /api/models and /api/chat proxy routes (nginx,
  k8s gateway, dev proxy); point the agent-service deployment at LiteLLM
  with the litellm-master-key secret; update the enable-LLM guide.
---
 .../texera/service/AccessControlService.scala |  13 +-
 .../activity/UserActivityEventListener.scala  |   4 +-
 .../resource/AccessControlResource.scala      | 159 +---------
 .../service/AccessControlServiceRunSpec.scala |   3 -
 .../resource/LiteLLMProxyAuthSpec.scala       | 299 ------------------
 agent-service/.env.example                    |  12 +-
 agent-service/src/api/backend-api.ts          |   4 +-
 agent-service/src/config/env.ts               |   6 +-
 agent-service/src/server.ts                   |  30 +-
 .../access-control-service-deployment.yaml    |  10 -
 .../templates/agent-service-deployment.yaml   |  12 +-
 bin/k8s/templates/agent-service-secret.yaml   |   8 +-
 bin/k8s/templates/gateway-routes.yaml         |  10 -
 bin/k8s/values-development.yaml               |   4 -
 bin/k8s/values.yaml                           |   4 -
 bin/single-node/.env                          |   6 +-
 bin/single-node/docker-compose.yml            |   2 +-
 bin/single-node/nginx.conf                    |  15 +-
 common/config/src/main/resources/llm.conf     |  27 --
 .../org/apache/texera/config/LLMConfig.scala  |  29 --
 docs/tutorials/guide-to-enable-llm-agent.md   |  21 +-
 frontend/proxy.config.json                    |  10 -
 .../workspace/service/agent/agent.service.ts  |   2 +-
 23 files changed, 66 insertions(+), 624 deletions(-)
 delete mode 100644 access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala
 delete mode 100644 common/config/src/main/resources/llm.conf
 delete mode 100644 common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala

diff --git a/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala b/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala
index e262b80900d..bcf9d19af54 100644
--- a/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala
+++ b/access-control-service/src/main/scala/org/apache/texera/service/AccessControlService.scala
@@ -32,14 +32,8 @@ import org.apache.texera.auth.{
 }
 import org.apache.texera.dao.SqlServer
 import org.apache.texera.service.activity.UserActivityEventListener
-import org.apache.texera.service.resource.{
-  AccessControlResource,
-  HealthCheckResource,
-  LiteLLMModelsResource,
-  LiteLLMProxyResource
-}
+import org.apache.texera.service.resource.{AccessControlResource, HealthCheckResource}
 import org.eclipse.jetty.server.session.SessionHandler
-import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature
 import java.nio.file.Path
 
 class AccessControlService extends Application[AccessControlServiceConfiguration] with LazyLogging {
@@ -73,8 +67,6 @@ class AccessControlService extends Application[AccessControlServiceConfiguration
 
     environment.jersey.register(classOf[HealthCheckResource])
     environment.jersey.register(classOf[AccessControlResource])
-    environment.jersey.register(classOf[LiteLLMProxyResource])
-    environment.jersey.register(classOf[LiteLLMModelsResource])
 
     // Register JWT authentication filter
     environment.jersey.register(new AuthDynamicFeature(classOf[JwtAuthFilter]))
@@ -85,9 +77,6 @@ class AccessControlService extends Application[AccessControlServiceConfiguration
       new io.dropwizard.auth.AuthValueFactoryProvider.Binder(classOf[SessionUser])
     )
 
-    // Required for @RolesAllowed on resources to be enforced.
-    environment.jersey.register(classOf[RolesAllowedDynamicFeature])
-
     // Record USER_LAST_ACTIVE_TIME on every matched, completed request.
     // Lives only in this service because authenticated client sessions
     // contact access-control-service often enough to capture activity
diff --git a/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala b/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala
index e5fa785f53e..e29167ffd15 100644
--- a/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala
+++ b/access-control-service/src/main/scala/org/apache/texera/service/activity/UserActivityEventListener.scala
@@ -43,8 +43,8 @@ import org.glassfish.jersey.server.monitoring.{
   * Lives in access-control-service because USER_LAST_ACTIVE_TIME is a
   * user-management concern; the assumption is that any authenticated
   * client session contacts this service often enough (UI navigation,
-  * permission checks, LiteLLM proxy) to capture activity with high
-  * recall, so other services do not need to mirror this listener.
+  * permission checks) to capture activity with high recall, so other
+  * services do not need to mirror this listener.
   */
 @Provider
 class UserActivityEventListener(track: Integer => Unit = UserActivityTracker.markActive)
diff --git a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala
index 0c90a6ce31f..8a225ad4c61 100644
--- a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala
+++ b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala
@@ -20,14 +20,13 @@ package org.apache.texera.service.resource
 import com.fasterxml.jackson.databind.ObjectMapper
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
 import com.typesafe.scalalogging.LazyLogging
-import jakarta.annotation.security.{PermitAll, RolesAllowed}
-import jakarta.ws.rs.client.{Client, ClientBuilder, Entity}
+import jakarta.annotation.security.PermitAll
 import jakarta.ws.rs.core._
-import jakarta.ws.rs.{Consumes, DELETE, GET, POST, Path, Produces}
+import jakarta.ws.rs.{DELETE, GET, POST, Path, Produces}
 import org.apache.texera.auth.JwtParser.parseToken
 import org.apache.texera.auth.SessionUser
 import org.apache.texera.auth.util.{ComputingUnitAccess, HeaderField}
-import org.apache.texera.config.{GuiConfig, KubernetesConfig, LLMConfig}
+import org.apache.texera.config.KubernetesConfig
 import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum
 
 import java.net.URLDecoder
@@ -242,155 +241,3 @@ class AccessControlResource extends LazyLogging {
     AccessControlResource.authorize(uriInfo, headers)
   }
 }
-
-// Forwards chat completions to LiteLLM with the server's master key, so
-// only authenticated users may call it.
-@Path("/chat")
-@RolesAllowed(Array("REGULAR", "ADMIN"))
-@Produces(Array(MediaType.APPLICATION_JSON))
-@Consumes(Array(MediaType.APPLICATION_JSON))
-class LiteLLMProxyResource(
-    copilotEnabled: Boolean,
-    litellmBaseUrl: String,
-    litellmApiKey: String
-) extends LazyLogging {
-
-  // No-arg constructor for Jersey reflection. Tests use the param-ful form.
-  def this() =
-    this(
-      GuiConfig.guiWorkflowWorkspaceCopilotEnabled,
-      LLMConfig.baseUrl,
-      LLMConfig.masterKey
-    )
-
-  private val client: Client = ClientBuilder.newClient()
-
-  @POST
-  @Path("/{path:.*}")
-  def proxyPost(
-      @Context uriInfo: UriInfo,
-      @Context headers: HttpHeaders,
-      body: String
-  ): Response = {
-    if (!copilotEnabled) {
-      return Response
-        .status(Response.Status.FORBIDDEN)
-        .entity(LiteLLMProxyResource.CopilotDisabledBody)
-        .build()
-    }
-
-    // uriInfo.getPath returns "chat/completions" for /api/chat/completions
-    // We want to forward as "/chat/completions" to LiteLLM
-    val fullPath = uriInfo.getPath
-    val targetUrl = s"$litellmBaseUrl/$fullPath"
-
-    logger.info(s"Proxying POST request to LiteLLM: $targetUrl")
-
-    try {
-      val requestBuilder = client
-        .target(targetUrl)
-        .request(MediaType.APPLICATION_JSON)
-        .header("Authorization", s"Bearer $litellmApiKey")
-
-      // Forward other relevant headers from the original request
-      headers.getRequestHeaders.asScala.foreach {
-        case (key, values)
-            if !key.equalsIgnoreCase("Authorization") &&
-              !key.equalsIgnoreCase("Host") &&
-              !key.equalsIgnoreCase("Content-Length") =>
-          values.asScala.foreach(value => requestBuilder.header(key, value))
-        case _ => // Skip Authorization, Host, and Content-Length headers
-      }
-
-      val response = requestBuilder.post(Entity.json(body))
-
-      // Build response with same status and body from LiteLLM
-      val responseBody = response.readEntity(classOf[String])
-      val responseBuilder = Response
-        .status(response.getStatus)
-        .entity(responseBody)
-
-      // Forward response headers
-      response.getHeaders.asScala.foreach {
-        case (key, values) =>
-          values.asScala.foreach(value => responseBuilder.header(key, value))
-      }
-
-      responseBuilder.build()
-    } catch {
-      case e: Exception =>
-        logger.error(s"Error proxying request to LiteLLM: ${e.getMessage}", e)
-        Response
-          .status(Response.Status.BAD_GATEWAY)
-          .entity(s"""{"error": "Failed to proxy request to LiteLLM: ${e.getMessage}"}""")
-          .build()
-    }
-  }
-}
-
-object LiteLLMProxyResource {
-  val CopilotDisabledBody: String = """{"error": "Copilot feature is disabled"}"""
-}
-
-@Path("/models")
-@RolesAllowed(Array("REGULAR", "ADMIN"))
-@Produces(Array(MediaType.APPLICATION_JSON))
-class LiteLLMModelsResource(
-    copilotEnabled: Boolean,
-    litellmBaseUrl: String,
-    litellmApiKey: String
-) extends LazyLogging {
-
-  // No-arg constructor for Jersey reflection. Tests use the param-ful form.
-  def this() =
-    this(
-      GuiConfig.guiWorkflowWorkspaceCopilotEnabled,
-      LLMConfig.baseUrl,
-      LLMConfig.masterKey
-    )
-
-  private val client: Client = ClientBuilder.newClient()
-
-  @GET
-  def getModels: Response = {
-    if (!copilotEnabled) {
-      return Response
-        .status(Response.Status.FORBIDDEN)
-        .entity(LiteLLMProxyResource.CopilotDisabledBody)
-        .build()
-    }
-
-    val targetUrl = s"$litellmBaseUrl/models"
-
-    logger.info(s"Fetching models from LiteLLM: $targetUrl")
-
-    try {
-      val response = client
-        .target(targetUrl)
-        .request(MediaType.APPLICATION_JSON)
-        .header("Authorization", s"Bearer $litellmApiKey")
-        .get()
-
-      // Build response with same status and body from LiteLLM
-      val responseBody = response.readEntity(classOf[String])
-      val responseBuilder = Response
-        .status(response.getStatus)
-        .entity(responseBody)
-
-      // Forward response headers
-      response.getHeaders.asScala.foreach {
-        case (key, values) =>
-          values.asScala.foreach(value => responseBuilder.header(key, value))
-      }
-
-      responseBuilder.build()
-    } catch {
-      case e: Exception =>
-        logger.error(s"Error fetching models from LiteLLM: ${e.getMessage}", e)
-        Response
-          .status(Response.Status.BAD_GATEWAY)
-          .entity(s"""{"error": "Failed to fetch models from LiteLLM: ${e.getMessage}"}""")
-          .build()
-    }
-  }
-}
diff --git a/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala b/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala
index 2460d18b456..96bfd104d9a 100644
--- a/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala
+++ b/access-control-service/src/test/scala/org/apache/texera/service/AccessControlServiceRunSpec.scala
@@ -25,7 +25,6 @@ import io.dropwizard.jetty.MutableServletContextHandler
 import io.dropwizard.jetty.setup.ServletEnvironment
 import org.apache.texera.auth.UnauthorizedExceptionMapper
 import org.apache.texera.service.activity.UserActivityEventListener
-import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature
 import org.mockito.ArgumentMatchers.isA
 import org.mockito.Mockito.{mock, verify, when}
 import org.scalatest.flatspec.AnyFlatSpec
@@ -47,8 +46,6 @@ class AccessControlServiceRunSpec extends AnyFlatSpec with Matchers {
 
     verify(jersey).register(isA(classOf[UserActivityEventListener]))
     verify(jersey).register(classOf[UnauthorizedExceptionMapper])
-    // Without this feature Jersey ignores @RolesAllowed on the LiteLLM proxies.
-    verify(jersey).register(classOf[RolesAllowedDynamicFeature])
     verify(jersey).setUrlPattern("/api/*")
   }
 }
diff --git a/access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala b/access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala
deleted file mode 100644
index 4d8d271c7d6..00000000000
--- a/access-control-service/src/test/scala/org/apache/texera/service/resource/LiteLLMProxyAuthSpec.scala
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.texera.service.resource
-
-import com.fasterxml.jackson.databind.ObjectMapper
-import com.fasterxml.jackson.module.scala.DefaultScalaModule
-import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer}
-import io.dropwizard.jackson.Jackson
-import io.dropwizard.testing.junit5.ResourceExtension
-import jakarta.ws.rs.client.Entity
-import jakarta.ws.rs.core.MediaType
-import org.apache.texera.auth.{JwtAuth, JwtAuthFilter, UnauthorizedExceptionMapper}
-import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
-import org.apache.texera.dao.jooq.generated.tables.pojos.User
-import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature
-import org.scalatest.BeforeAndAfterAll
-import org.scalatest.flatspec.AnyFlatSpec
-import org.scalatest.matchers.should.Matchers
-
-import java.net.InetSocketAddress
-import java.nio.charset.StandardCharsets
-
-// Wires the LiteLLM proxy resources through the same Jersey auth pipeline
-// production uses and fires HTTP requests with no / wrong-role / right-role
-// Bearer tokens. The @RolesAllowed annotations are only enforced when
-// AccessControlService registers RolesAllowedDynamicFeature; this spec is
-// the regression guard that the annotation, the registration, and the
-// JwtAuthFilter priority continue to compose into the expected status codes.
-class LiteLLMProxyAuthSpec extends AnyFlatSpec with Matchers with BeforeAndAfterAll {
-
-  private val testMapper: ObjectMapper =
-    Jackson.newObjectMapper().registerModule(DefaultScalaModule)
-
-  // Inject copilotEnabled = true and a guaranteed-unreachable upstream so
-  // requests that pass the auth + role gates fall straight into the
-  // resource's "proxy attempt failed" branch — that 502 confirms the auth
-  // pipeline didn't short-circuit before the resource body ran.
-  private val unreachableLiteLLM = "http://127.0.0.1:1"
-
-  private val resources: ResourceExtension = ResourceExtension
-    .builder()
-    .setMapper(testMapper)
-    .addProvider(classOf[JwtAuthFilter])
-    .addProvider(classOf[UnauthorizedExceptionMapper])
-    .addProvider(classOf[RolesAllowedDynamicFeature])
-    .addResource(
-      new LiteLLMProxyResource(
-        copilotEnabled = true,
-        litellmBaseUrl = unreachableLiteLLM,
-        litellmApiKey = "test"
-      )
-    )
-    .addResource(
-      new LiteLLMModelsResource(
-        copilotEnabled = true,
-        litellmBaseUrl = unreachableLiteLLM,
-        litellmApiKey = "test"
-      )
-    )
-    .build()
-
-  // Second resource extension with copilotEnabled = false, used to exercise
-  // the resource's early-exit branch that returns "Copilot feature is disabled".
-  private val resourcesCopilotDisabled: ResourceExtension = ResourceExtension
-    .builder()
-    .setMapper(testMapper)
-    .addProvider(classOf[JwtAuthFilter])
-    .addProvider(classOf[UnauthorizedExceptionMapper])
-    .addProvider(classOf[RolesAllowedDynamicFeature])
-    .addResource(
-      new LiteLLMProxyResource(
-        copilotEnabled = false,
-        litellmBaseUrl = unreachableLiteLLM,
-        litellmApiKey = "test"
-      )
-    )
-    .addResource(
-      new LiteLLMModelsResource(
-        copilotEnabled = false,
-        litellmBaseUrl = unreachableLiteLLM,
-        litellmApiKey = "test"
-      )
-    )
-    .build()
-
-  // An in-process HTTP server stands in for LiteLLM so the resources' success
-  // path (header / body / status forwarding) is exercised end-to-end without
-  // a network dependency. Bound to port 0 to pick any free port.
-  private val mockChatBody = """{"id":"mock-chat","choices":[{"message":{"content":"hi"}}]}"""
-  private val mockModelsBody = """{"data":[{"id":"mock-gpt"}]}"""
-
-  private val mockLiteLLM: HttpServer = HttpServer.create(new InetSocketAddress(0), 0)
-  mockLiteLLM.createContext("/chat/completions", respondWith(200, mockChatBody))
-  mockLiteLLM.createContext("/models", respondWith(200, mockModelsBody))
-
-  private def respondWith(status: Int, body: String): HttpHandler =
-    (exchange: HttpExchange) => {
-      val bytes = body.getBytes(StandardCharsets.UTF_8)
-      exchange.getResponseHeaders.add("Content-Type", MediaType.APPLICATION_JSON)
-      exchange.sendResponseHeaders(status, bytes.length.toLong)
-      val os = exchange.getResponseBody
-      try os.write(bytes)
-      finally os.close()
-    }
-
-  private def mockBaseUrl: String = s"http://127.0.0.1:${mockLiteLLM.getAddress.getPort}"
-
-  // Third extension: copilot on, upstream reachable. Resource is built lazily
-  // because litellmBaseUrl depends on the mock server's bound port.
-  private lazy val resourcesMockLiteLLM: ResourceExtension = ResourceExtension
-    .builder()
-    .setMapper(testMapper)
-    .addProvider(classOf[JwtAuthFilter])
-    .addProvider(classOf[UnauthorizedExceptionMapper])
-    .addProvider(classOf[RolesAllowedDynamicFeature])
-    .addResource(
-      new LiteLLMProxyResource(
-        copilotEnabled = true,
-        litellmBaseUrl = mockBaseUrl,
-        litellmApiKey = "test"
-      )
-    )
-    .addResource(
-      new LiteLLMModelsResource(
-        copilotEnabled = true,
-        litellmBaseUrl = mockBaseUrl,
-        litellmApiKey = "test"
-      )
-    )
-    .build()
-
-  override protected def beforeAll(): Unit = {
-    mockLiteLLM.start()
-    resources.before()
-    resourcesCopilotDisabled.before()
-    resourcesMockLiteLLM.before()
-  }
-  override protected def afterAll(): Unit = {
-    resourcesMockLiteLLM.after()
-    resourcesCopilotDisabled.after()
-    resources.after()
-    mockLiteLLM.stop(0)
-  }
-
-  private def token(role: UserRoleEnum): String = {
-    val u = new User()
-    u.setUid(1)
-    u.setName("test")
-    u.setEmail("test@example.com")
-    u.setGoogleId(null)
-    u.setRole(role)
-    JwtAuth.jwtToken(JwtAuth.jwtClaims(u, expireInDays = 1))
-  }
-
-  private val chatBody = """{"model":"gpt-4o-mini","messages":[]}"""
-
-  "POST /chat/completions without an Authorization header" should "return 401 with a Bearer challenge" in {
-    val response = resources
-      .target("/chat/completions")
-      .request(MediaType.APPLICATION_JSON)
-      .post(Entity.json(chatBody))
-    response.getStatus shouldBe 401
-    response.getHeaderString("WWW-Authenticate") shouldBe JwtAuthFilter.BearerChallenge
-  }
-
-  it should "return 403 with an INACTIVE-role token" in {
-    val response = resources
-      .target("/chat/completions")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.INACTIVE)}")
-      .post(Entity.json(chatBody))
-    response.getStatus shouldBe 403
-  }
-
-  it should "return 502 when the upstream LiteLLM call fails" in {
-    // Exercises the resource's catch branch via a connect-refused upstream.
-    val response = resources
-      .target("/chat/completions")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.REGULAR)}")
-      .post(Entity.json(chatBody))
-    response.getStatus shouldBe 502
-  }
-
-  it should "return the resource's Copilot-disabled response when copilot is off" in {
-    // 403 alone is ambiguous (could be from RolesAllowedDynamicFeature);
-    // matching the entity to the same constant the resource emits proves the
-    // role check passed and the resource's own early-exit branch fired.
-    val response = resourcesCopilotDisabled
-      .target("/chat/completions")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.REGULAR)}")
-      .post(Entity.json(chatBody))
-    response.getStatus shouldBe 403
-    response.readEntity(classOf[String]) shouldBe LiteLLMProxyResource.CopilotDisabledBody
-  }
-
-  "GET /models without an Authorization header" should "return 401 with a Bearer challenge" in {
-    val response = resources.target("/models").request(MediaType.APPLICATION_JSON).get()
-    response.getStatus shouldBe 401
-    response.getHeaderString("WWW-Authenticate") shouldBe JwtAuthFilter.BearerChallenge
-  }
-
-  it should "return 403 with an INACTIVE-role token" in {
-    val response = resources
-      .target("/models")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.INACTIVE)}")
-      .get()
-    response.getStatus shouldBe 403
-  }
-
-  it should "return 502 when the upstream LiteLLM call fails" in {
-    val response = resources
-      .target("/models")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.ADMIN)}")
-      .get()
-    response.getStatus shouldBe 502
-  }
-
-  it should "return the resource's Copilot-disabled response when copilot is off" in {
-    val response = resourcesCopilotDisabled
-      .target("/models")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.ADMIN)}")
-      .get()
-    response.getStatus shouldBe 403
-    response.readEntity(classOf[String]) shouldBe LiteLLMProxyResource.CopilotDisabledBody
-  }
-
-  "POST /chat/completions" should "forward the upstream response when copilot is on and upstream is reachable" in {
-    val response = resourcesMockLiteLLM
-      .target("/chat/completions")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.REGULAR)}")
-      .post(Entity.json(chatBody))
-    response.getStatus shouldBe 200
-    response.readEntity(classOf[String]) shouldBe mockChatBody
-  }
-
-  "GET /models" should "forward the upstream response when copilot is on and upstream is reachable" in {
-    val response = resourcesMockLiteLLM
-      .target("/models")
-      .request(MediaType.APPLICATION_JSON)
-      .header("Authorization", s"Bearer ${token(UserRoleEnum.ADMIN)}")
-      .get()
-    response.getStatus shouldBe 200
-    response.readEntity(classOf[String]) shouldBe mockModelsBody
-  }
-
-  // Regression guard for the no-arg auxiliary constructor that Jersey
-  // reflection picks at production startup. Jersey resolves constructors in
-  // descending parameter count and skips any whose parameters are not
-  // @Context / HK2 injectable; LiteLLMProxyResource's 3-arg ctor takes plain
-  // Boolean / String values, so the no-arg form must exist and be picked.
-  // addResource(classOf[...]) (vs. addResource(new ...)) exercises that path.
-  "Jersey reflection" should "instantiate both LiteLLM resources via their no-arg constructors" in {
-    val reflective = ResourceExtension
-      .builder()
-      .setMapper(testMapper)
-      .addProvider(classOf[JwtAuthFilter])
-      .addProvider(classOf[UnauthorizedExceptionMapper])
-      .addProvider(classOf[RolesAllowedDynamicFeature])
-      .addResource(classOf[LiteLLMProxyResource])
-      .addResource(classOf[LiteLLMModelsResource])
-      .build()
-    reflective.before()
-    try {
-      val chat = reflective.target("/chat/completions").request(MediaType.APPLICATION_JSON).get()
-      // Unauthenticated GET on the POST-only chat path: we just need any
-      // response that proves Jersey wired the resource (4xx is fine; an
-      // instantiation failure surfaces as 500 or a test setup error).
-      chat.getStatus should (be >= 400 and be < 500)
-
-      val models = reflective.target("/models").request(MediaType.APPLICATION_JSON).get()
-      models.getStatus shouldBe 401
-    } finally {
-      reflective.after()
-    }
-  }
-}
diff --git a/agent-service/.env.example b/agent-service/.env.example
index 605f157ca0c..d7a45e41f59 100644
--- a/agent-service/.env.example
+++ b/agent-service/.env.example
@@ -8,12 +8,12 @@ TEXERA_SERVICE_LOG_LEVEL=INFO
 # Human-readable dev logs via pino-pretty.
 LOG_PRETTY=true
 
-# LLM_API_KEY authenticates this service to the gateway — NOT the
-# upstream provider key (OpenAI / Anthropic / etc.), which is
-# configured inside the gateway. "dummy" works when the gateway
-# does not enforce a key (e.g. LiteLLM with no master_key).
-LLM_API_KEY=dummy
-LLM_ENDPOINT=http://localhost:9096
+# The agent service is a trusted backend, so it calls the LiteLLM gateway
+# directly with the master key. LITELLM_MASTER_KEY is LiteLLM's master key
+# (NOT an upstream provider key); "dummy" works when LiteLLM runs without a
+# master_key. LITELLM_BASE_URL points at the LiteLLM proxy.
+LITELLM_MASTER_KEY=dummy
+LITELLM_BASE_URL=http://localhost:4000
 
 # Texera backend services
 TEXERA_DASHBOARD_SERVICE_ENDPOINT=http://localhost:8080
diff --git a/agent-service/src/api/backend-api.ts b/agent-service/src/api/backend-api.ts
index ffd2c59433f..d19eda5bc3f 100644
--- a/agent-service/src/api/backend-api.ts
+++ b/agent-service/src/api/backend-api.ts
@@ -21,14 +21,14 @@ import { env } from "../config/env";
 
 interface BackendConfig {
   apiEndpoint: string;
-  modelsEndpoint: string;
+  litellmBaseUrl: string;
   compileEndpoint: string;
   executionEndpoint: string;
 }
 
 const currentConfig: BackendConfig = {
   apiEndpoint: env.TEXERA_DASHBOARD_SERVICE_ENDPOINT,
-  modelsEndpoint: env.LLM_ENDPOINT,
+  litellmBaseUrl: env.LITELLM_BASE_URL,
   compileEndpoint: env.WORKFLOW_COMPILING_SERVICE_ENDPOINT,
   executionEndpoint: env.WORKFLOW_EXECUTION_SERVICE_ENDPOINT,
 };
diff --git a/agent-service/src/config/env.ts b/agent-service/src/config/env.ts
index 16a25b9be77..d8828d0647d 100644
--- a/agent-service/src/config/env.ts
+++ b/agent-service/src/config/env.ts
@@ -22,7 +22,9 @@ import { z } from "zod";
 const EnvSchema = z.object({
   PORT: z.coerce.number().default(3001),
   API_PREFIX: z.string().default("/api"),
-  LLM_API_KEY: z.string().default("dummy"),
+  // Master key for the LiteLLM gateway. The agent service is a trusted backend,
+  // so it holds this secret and calls LiteLLM directly (no access-control proxy).
+  LITELLM_MASTER_KEY: z.string().default("dummy"),
   TEXERA_SERVICE_LOG_LEVEL: z
     .enum(["ERROR", "WARN", "INFO", "DEBUG"])
     .transform(v => v.toLowerCase() as "error" | "warn" | "info" | "debug")
@@ -30,7 +32,7 @@ const EnvSchema = z.object({
   LOG_PRETTY: z.coerce.boolean().default(false),
 
   TEXERA_DASHBOARD_SERVICE_ENDPOINT: z.string().url().default("http://localhost:8080"),
-  LLM_ENDPOINT: z.string().url().default("http://localhost:9096"),
+  LITELLM_BASE_URL: z.string().url().default("http://localhost:4000"),
   WORKFLOW_COMPILING_SERVICE_ENDPOINT: z.string().url().default("http://localhost:9090"),
   WORKFLOW_EXECUTION_SERVICE_ENDPOINT: z.string().url().default("http://localhost:8085"),
   EXECUTION_ENDPOINT_TEMPLATE: z.string().optional(),
diff --git a/agent-service/src/server.ts b/agent-service/src/server.ts
index d5eeae82c9b..2b537d09105 100644
--- a/agent-service/src/server.ts
+++ b/agent-service/src/server.ts
@@ -52,9 +52,11 @@ async function createAgentInstance(
   const agentId = `agent-${++agentCounter}`;
   const config = getBackendConfig();
 
+  // The agent service is a trusted backend, so it calls the LiteLLM gateway
+  // directly with the master key (no access-control proxy hop).
   const openai = createOpenAI({
-    baseURL: `${config.modelsEndpoint}/api`,
-    apiKey: env.LLM_API_KEY,
+    baseURL: config.litellmBaseUrl,
+    apiKey: env.LITELLM_MASTER_KEY,
   });
 
   // Reasoning effort variants are configured as separate model entries in litellm-config.yaml
@@ -165,6 +167,26 @@ const agentsRouter = new Elysia({ prefix: "/agents" })
     return { agents: agentList };
   })
 
+  // Lists the models available on the LiteLLM gateway. Previously the frontend
+  // hit a LiteLLM proxy on the access-control-service; the agent service now
+  // owns this since it already holds the master key and talks to LiteLLM.
+  .get("/models", async ({ set }) => {
+    const { litellmBaseUrl } = getBackendConfig();
+    try {
+      const response = await fetch(`${litellmBaseUrl}/models`, {
+        headers: { Authorization: `Bearer ${env.LITELLM_MASTER_KEY}` },
+      });
+      if (!response.ok) {
+        set.status = 502;
+        return { error: `Failed to fetch models from LiteLLM: ${response.status} ${response.statusText}` };
+      }
+      return await response.json();
+    } catch (error) {
+      set.status = 502;
+      return { error: `Failed to fetch models from LiteLLM: ${error instanceof Error ? error.message : String(error)}` };
+    }
+  })
+
   .post(
     "/",
     async ({ body }) => {
@@ -630,8 +652,8 @@ function printStartupMessage(app: ReturnType<typeof buildApp>) {
 
   console.log("");
   console.log("Environment:");
-  console.log(`  LLM_API_KEY: ${env.LLM_API_KEY === "dummy" ? "dummy (default)" : "set"}`);
-  console.log(`  LLM_ENDPOINT: ${getBackendConfig().modelsEndpoint}`);
+  console.log(`  LITELLM_MASTER_KEY: ${env.LITELLM_MASTER_KEY === "dummy" ? "dummy (default)" : "set"}`);
+  console.log(`  LITELLM_BASE_URL: ${getBackendConfig().litellmBaseUrl}`);
   console.log(`  WORKFLOW_COMPILING_SERVICE_ENDPOINT: ${getBackendConfig().compileEndpoint}`);
   console.log(`  TEXERA_DASHBOARD_SERVICE_ENDPOINT: ${getBackendConfig().apiEndpoint}`);
   console.log("");
diff --git a/bin/k8s/templates/access-control-service-deployment.yaml b/bin/k8s/templates/access-control-service-deployment.yaml
index 99713e70713..d3c1ac513c4 100644
--- a/bin/k8s/templates/access-control-service-deployment.yaml
+++ b/bin/k8s/templates/access-control-service-deployment.yaml
@@ -50,16 +50,6 @@ spec:
               value: {{ .Values.workflowComputingUnitPool.name }}
             - name: KUBERNETES_COMPUTE_UNIT_POOL_NAMESPACE
               value: {{ .Values.workflowComputingUnitPool.namespace }}
-            {{- if .Values.litellm.enabled }}
-            # LLM gateway used to serve /api/chat and /api/models to the agent service.
-            - name: LITELLM_BASE_URL
-              value: http://{{ .Values.litellm.name }}-svc:{{ .Values.litellm.service.port }}
-            - name: LITELLM_MASTER_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Release.Name }}-agent-service-secret
-                  key: litellm-master-key
-            {{- end }}
             {{- range .Values.texeraEnvVars }}
             - name: {{ .name }}
               value: "{{ .value }}"
diff --git a/bin/k8s/templates/agent-service-deployment.yaml b/bin/k8s/templates/agent-service-deployment.yaml
index 5437cd7d9cd..98a38f2408a 100644
--- a/bin/k8s/templates/agent-service-deployment.yaml
+++ b/bin/k8s/templates/agent-service-deployment.yaml
@@ -46,21 +46,21 @@ spec:
             # Dashboard service: workflow CRUD + operator metadata.
             - name: TEXERA_DASHBOARD_SERVICE_ENDPOINT
               value: http://{{ .Values.webserver.name }}-svc:{{ .Values.webserver.service.port }}
-            # LLM gateway: access-control-service serves /api/chat and /api/models,
-            # forwarding to LiteLLM (mirrors the single-node nginx routing).
-            - name: LLM_ENDPOINT
-              value: http://{{ .Values.accessControlService.name }}-svc:{{ .Values.accessControlService.service.port }}
+            # The agent service is a trusted backend, so it calls LiteLLM
+            # directly with the master key (no access-control proxy hop).
+            - name: LITELLM_BASE_URL
+              value: http://{{ .Values.litellm.name }}-svc:{{ .Values.litellm.service.port }}
             - name: WORKFLOW_COMPILING_SERVICE_ENDPOINT
               value: http://{{ .Values.workflowCompilingService.name }}-svc:{{ .Values.workflowCompilingService.service.port }}
             # Per-computing-unit execution endpoint; "{cuid}" is substituted with the
             # computing unit id at request time.
             - name: EXECUTION_ENDPOINT_TEMPLATE
               value: http://computing-unit-{cuid}.{{ .Values.workflowComputingUnitPool.name }}-svc.{{ .Values.workflowComputingUnitPool.namespace }}.svc.cluster.local:{{ .Values.workflowComputingUnitPool.service.port }}
-            - name: LLM_API_KEY
+            - name: LITELLM_MASTER_KEY
               valueFrom:
                 secretKeyRef:
                   name: {{ .Release.Name }}-agent-service-secret
-                  key: llm-api-key
+                  key: litellm-master-key
           # The service loads operator metadata from the dashboard service on
           # startup, so gate readiness on its health endpoint before the gateway
           # routes traffic here. /api/healthcheck needs no auth.
diff --git a/bin/k8s/templates/agent-service-secret.yaml b/bin/k8s/templates/agent-service-secret.yaml
index 61746a0aeb8..32ea2e4caa0 100644
--- a/bin/k8s/templates/agent-service-secret.yaml
+++ b/bin/k8s/templates/agent-service-secret.yaml
@@ -15,9 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Shared secret for the agent service and LiteLLM. Holds the agent's gateway
-# key, LiteLLM's master key, and the upstream provider API keys. Provide real
-# values via `--set` or a values override file; do not commit them.
+# Shared secret for the agent service and LiteLLM. Holds LiteLLM's master key
+# (used by both the agent service and LiteLLM) and the upstream provider API
+# keys. Provide real values via `--set` or a values override file; do not
+# commit them.
 {{- if or .Values.agentService.enabled .Values.litellm.enabled }}
 apiVersion: v1
 kind: Secret
@@ -26,7 +27,6 @@ metadata:
   namespace: {{ .Release.Namespace }}
 type: Opaque
 stringData:
-  llm-api-key: "{{ .Values.agentService.env.llmApiKey }}"
   litellm-master-key: "{{ .Values.litellm.masterKey }}"
   {{- range $key, $value := .Values.litellm.providerApiKeys }}
   {{ $key }}: "{{ $value }}"
diff --git a/bin/k8s/templates/gateway-routes.yaml b/bin/k8s/templates/gateway-routes.yaml
index ac8096aa212..5fdb3da7c8d 100644
--- a/bin/k8s/templates/gateway-routes.yaml
+++ b/bin/k8s/templates/gateway-routes.yaml
@@ -66,16 +66,6 @@ spec:
       backendRefs:
         - name: config-service-svc
           port: 9094
-    - matches:
-        - path:
-            type: PathPrefix
-            value: /api/models
-        - path:
-            type: PathPrefix
-            value: /api/chat
-      backendRefs:
-        - name: access-control-service-svc
-          port: 9096
     - matches:
         - path:
             type: PathPrefix
diff --git a/bin/k8s/values-development.yaml b/bin/k8s/values-development.yaml
index 5537b39acc3..dc7078e4688 100644
--- a/bin/k8s/values-development.yaml
+++ b/bin/k8s/values-development.yaml
@@ -233,10 +233,6 @@ agentService:
   service:
     type: ClusterIP
     port: 3001
-  env:
-    # Authenticates the agent service to the in-cluster LLM gateway
-    # (access-control-service / LiteLLM), not to the upstream provider.
-    llmApiKey: "dummy"
 
 litellm:
   enabled: true
diff --git a/bin/k8s/values.yaml b/bin/k8s/values.yaml
index 32687a7ac5e..2974c27c882 100644
--- a/bin/k8s/values.yaml
+++ b/bin/k8s/values.yaml
@@ -215,10 +215,6 @@ agentService:
   service:
     type: ClusterIP
     port: 3001
-  env:
-    # Authenticates the agent service to the in-cluster LLM gateway
-    # (access-control-service / LiteLLM), not to the upstream provider.
-    llmApiKey: "dummy"
 
 litellm:
   enabled: true
diff --git a/bin/single-node/.env b/bin/single-node/.env
index 54aa2f5b322..bf3271a0021 100644
--- a/bin/single-node/.env
+++ b/bin/single-node/.env
@@ -87,13 +87,13 @@ FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT=http://file-service:9092/api/da
 # Toggles the texera agent panel; set false to hide it in the GUI.
 GUI_WORKFLOW_WORKSPACE_COPILOT_ENABLED=true
 
-# Litellm key and URL for internal LLM traffic
+# LiteLLM master key + URL for internal LLM traffic. Used by the litellm
+# service and by the agent-service, which calls LiteLLM directly for chat
+# completions and the model list.
 LITELLM_MASTER_KEY=sk-texera-internal-do-not-share
 LITELLM_BASE_URL=http://litellm:4000
 
 # Configurations for agent-service to connect to Texera's services
-LLM_ENDPOINT=http://nginx:8080
-LLM_API_KEY=dummy
 TEXERA_DASHBOARD_SERVICE_ENDPOINT=http://dashboard-service:8080
 WORKFLOW_COMPILING_SERVICE_ENDPOINT=http://workflow-compiling-service:9090
 WORKFLOW_EXECUTION_SERVICE_ENDPOINT=http://workflow-runtime-coordinator-service:8085
diff --git a/bin/single-node/docker-compose.yml b/bin/single-node/docker-compose.yml
index e26fe8aa957..9638769d22d 100644
--- a/bin/single-node/docker-compose.yml
+++ b/bin/single-node/docker-compose.yml
@@ -299,7 +299,7 @@ services:
       timeout: 3s
       retries: 10
 
-  # AccessControlService handles user permissions and proxies LLM API traffic
+  # AccessControlService handles user permissions and access control
   access-control-service:
     image: ${IMAGE_REGISTRY:-ghcr.io/apache}/texera-access-control-service:${IMAGE_TAG:-latest}
     container_name: access-control-service
diff --git a/bin/single-node/nginx.conf b/bin/single-node/nginx.conf
index 515a7016da0..70947c00792 100644
--- a/bin/single-node/nginx.conf
+++ b/bin/single-node/nginx.conf
@@ -57,19 +57,8 @@ http {
             proxy_set_header X-Real-IP $remote_addr;
         }
 
-        # LLM API traffic goes through access-control-service, which then forwards to LiteLLM.
-        location = /api/models {
-            proxy_pass http://access-control-service:9096;
-            proxy_set_header Host $host;
-            proxy_set_header X-Real-IP $remote_addr;
-        }
-
-        location /api/chat/ {
-            proxy_pass http://access-control-service:9096;
-            proxy_set_header Host $host;
-            proxy_set_header X-Real-IP $remote_addr;
-        }
-
+        # LLM traffic (chat + model list) is handled by the agent-service, which
+        # talks to LiteLLM directly; /api/agents below already routes there.
         location /api/agents {
             proxy_pass http://agent-service:3001;
             proxy_http_version 1.1;
diff --git a/common/config/src/main/resources/llm.conf b/common/config/src/main/resources/llm.conf
deleted file mode 100644
index 23b9360cdab..00000000000
--- a/common/config/src/main/resources/llm.conf
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# LLM Configuration
-llm {
-  # Base URL for LiteLLM service
-  base-url = "http://0.0.0.0:4000"
-  base-url = ${?LITELLM_BASE_URL}
-
-  # Master key for LiteLLM authentication
-  master-key = ""
-  master-key = ${?LITELLM_MASTER_KEY}
-}
diff --git a/common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala b/common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala
deleted file mode 100644
index a85b734bad6..00000000000
--- a/common/config/src/main/scala/org/apache/texera/config/LLMConfig.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.texera.config
-
-import com.typesafe.config.{Config, ConfigFactory}
-
-object LLMConfig {
-  private val conf: Config = ConfigFactory.parseResources("llm.conf").resolve()
-
-  // LLM Service Configuration
-  val baseUrl: String = conf.getString("llm.base-url")
-  val masterKey: String = conf.getString("llm.master-key")
-}
diff --git a/docs/tutorials/guide-to-enable-llm-agent.md b/docs/tutorials/guide-to-enable-llm-agent.md
index efa2622b6fa..65cfd86500a 100644
--- a/docs/tutorials/guide-to-enable-llm-agent.md
+++ b/docs/tutorials/guide-to-enable-llm-agent.md
@@ -63,21 +63,10 @@ Modify `common/config/src/main/resources/gui.conf` to enable the agent feature:
 
 ## Step 5: Configure LiteLLM Connection (Optional)
 
-The `AccessControlService` acts as a gateway between the frontend and LiteLLM. If LiteLLM is running on a different host or port, modify `common/config/src/main/resources/llm.conf`:
-
-```diff
- llm {
-   # Base URL for LiteLLM service
--  base-url = "http://0.0.0.0:4000"
-+  base-url = "http://your-litellm-host:4000"
-
-   # Master key for LiteLLM authentication
--  master-key = ""
-+  master-key = "your-master-key"
- }
-```
-
-Alternatively, set environment variables:
+The `AgentService` is a trusted backend that calls LiteLLM directly — for both
+chat completions and the model list shown in the agent panel — using LiteLLM's
+master key. If LiteLLM is running on a different host or port, or is protected
+by a master key, configure the agent service via environment variables:
 
 ```bash
 export LITELLM_BASE_URL=http://your-litellm-host:4000
@@ -86,7 +75,7 @@ export LITELLM_MASTER_KEY=your-master-key
 
 ## Step 6: Start Texera Services
 
-Start the **all** Texera micro services, including the `AccessControlService`.
+Start **all** the Texera micro services, including the `AgentService`.
 
 ## Done!
 
diff --git a/frontend/proxy.config.json b/frontend/proxy.config.json
index f68602e0714..78be3483623 100755
--- a/frontend/proxy.config.json
+++ b/frontend/proxy.config.json
@@ -10,16 +10,6 @@
     "secure": false,
     "changeOrigin": true
   },
-"/api/models": {
-    "target": "http://localhost:9096",
-    "secure": false,
-    "changeOrigin": true
-  },
-  "/api/chat/completion": {
-    "target": "http://localhost:9096",
-    "secure": false,
-    "changeOrigin": true
-  },
   "/api/compile": {
     "target": "http://localhost:9090",
     "secure": false,
diff --git a/frontend/src/app/workspace/service/agent/agent.service.ts b/frontend/src/app/workspace/service/agent/agent.service.ts
index 2009734030b..2c922f5e085 100644
--- a/frontend/src/app/workspace/service/agent/agent.service.ts
+++ b/frontend/src/app/workspace/service/agent/agent.service.ts
@@ -864,7 +864,7 @@ export class AgentService {
    */
   public fetchModelTypes(): Observable<ModelType[]> {
     if (!this.modelTypes$) {
-      this.modelTypes$ = this.http.get<LiteLLMModelsResponse>(`${AppSettings.getApiEndpoint()}/models`).pipe(
+      this.modelTypes$ = this.http.get<LiteLLMModelsResponse>(`${AppSettings.getApiEndpoint()}/agents/models`).pipe(
         map(response =>
           response.data.map((model: LiteLLMModel) => ({
             id: model.id,

From b8f655a09ac1851ba06ec2b88612a5e34b3d76e4 Mon Sep 17 00:00:00 2001
From: Bob Bai <bobbai0509@gmail.com>
Date: Sun, 7 Jun 2026 21:19:20 -0700
Subject: [PATCH 2/4] style(agent): format server.ts models endpoint with
 prettier

---
 agent-service/src/server.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/agent-service/src/server.ts b/agent-service/src/server.ts
index 2b537d09105..2a328bb7c1d 100644
--- a/agent-service/src/server.ts
+++ b/agent-service/src/server.ts
@@ -183,7 +183,9 @@ const agentsRouter = new Elysia({ prefix: "/agents" })
       return await response.json();
     } catch (error) {
       set.status = 502;
-      return { error: `Failed to fetch models from LiteLLM: ${error instanceof Error ? error.message : String(error)}` };
+      return {
+        error: `Failed to fetch models from LiteLLM: ${error instanceof Error ? error.message : String(error)}`,
+      };
     }
   })
 

From e9d8e174e2d3b237d91beef1796b252927237e97 Mon Sep 17 00:00:00 2001
From: Bob Bai <bobbai0509@gmail.com>
Date: Sun, 7 Jun 2026 21:23:16 -0700
Subject: [PATCH 3/4] test(agent): cover GET /api/agents/models success and
 LiteLLM failure paths

---
 agent-service/src/server.test.ts | 69 +++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/agent-service/src/server.test.ts b/agent-service/src/server.test.ts
index 0f618e599c2..5eb15b6e248 100644
--- a/agent-service/src/server.test.ts
+++ b/agent-service/src/server.test.ts
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-import { beforeEach, describe, expect, test } from "bun:test";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
 import { buildApp, _resetAgentStoreForTests } from "./server";
 import { env } from "./config/env";
 
@@ -200,6 +200,73 @@ describe("Agent control routes", () => {
   });
 });
 
+describe(`GET ${API}/agents/models`, () => {
+  // The endpoint fetches the model list from LiteLLM directly (the agent
+  // service holds the master key). Stub global fetch so no real LiteLLM is
+  // needed, and capture the outgoing request to assert URL + auth header.
+  const realFetch = globalThis.fetch;
+  let calls: Array<{ url: string; authorization: string | null }>;
+
+  function mockLiteLLM(responder: (reqUrl: string) => Response | Promise<Response>): void {
+    globalThis.fetch = (async (
+      input: Parameters<typeof fetch>[0],
+      init?: Parameters<typeof fetch>[1]
+    ): Promise<Response> => {
+      const reqUrl = String(input);
+      const authorization = new Headers(init?.headers).get("Authorization");
+      calls.push({ url: reqUrl, authorization });
+      return responder(reqUrl);
+    }) as typeof globalThis.fetch;
+  }
+
+  beforeEach(() => {
+    calls = [];
+  });
+
+  afterEach(() => {
+    globalThis.fetch = realFetch;
+  });
+
+  test("forwards the LiteLLM model list and calls it directly with the master key", async () => {
+    const upstream = {
+      data: [{ id: "gpt-5-mini", object: "model", created: 0, owned_by: "openai" }],
+      object: "list",
+    };
+    mockLiteLLM(
+      () => new Response(JSON.stringify(upstream), { status: 200, headers: { "Content-Type": "application/json" } })
+    );
+
+    const res = await getJson(`${API}/agents/models`);
+    expect(res.status).toBe(200);
+    expect(await readJson<typeof upstream>(res)).toEqual(upstream);
+
+    // One call, straight to <litellm-base>/models, carrying the master key.
+    expect(calls).toHaveLength(1);
+    expect(calls[0].url).toBe(`${env.LITELLM_BASE_URL}/models`);
+    expect(calls[0].authorization).toBe(`Bearer ${env.LITELLM_MASTER_KEY}`);
+  });
+
+  test("returns 502 when LiteLLM responds with a non-OK status", async () => {
+    mockLiteLLM(() => new Response("unauthorized", { status: 401 }));
+
+    const res = await getJson(`${API}/agents/models`);
+    expect(res.status).toBe(502);
+    const body = await readJson<{ error: string }>(res);
+    expect(body.error).toContain("Failed to fetch models from LiteLLM");
+  });
+
+  test("returns 502 when LiteLLM is unreachable", async () => {
+    mockLiteLLM(() => {
+      throw new Error("connect ECONNREFUSED 127.0.0.1:4000");
+    });
+
+    const res = await getJson(`${API}/agents/models`);
+    expect(res.status).toBe(502);
+    const body = await readJson<{ error: string }>(res);
+    expect(body.error).toContain("Failed to fetch models from LiteLLM");
+  });
+});
+
 describe(`PATCH ${API}/agents/:id/settings`, () => {
   test("updates settings and returns the new values", async () => {
     const created = await readJson<{ id: string }>(await postJson(`${API}/agents`, { modelType: "m" }));

From e291fd5a68b197a81f4db729adf33527e89ff84b Mon Sep 17 00:00:00 2001
From: Bob Bai <bobbai0509@gmail.com>
Date: Mon, 8 Jun 2026 00:28:58 -0700
Subject: [PATCH 4/4] feat(agent): authenticate agent-service requests via JWT
 + gateway ext_authz

Phase 1 authentication for the agent service (see #5561):

- agent-service: real HS256 JWT verification using the shared secret read from
  auth.conf (env AUTH_JWT_SECRET); a guard rejects unauthenticated REST (Bearer)
  and WebSocket (access-token query) requests. auth.conf is bundled into the image.
- access-control-service: authorize() gains an /api/agents branch that verifies
  the JWT and requires REGULAR/ADMIN, returning the trusted x-user-* headers
  (allow-all per-agent for now; per-agent ownership is deferred to #5302).
- gateways: nginx auth_request (single-node) and an Envoy SecurityPolicy (k8s)
  route /api/agents through the access-control-service; the agent-service
  deployment now receives AUTH_JWT_SECRET.
- frontend: attach the JWT to every agent call (Bearer on REST, access-token on
  the /react WebSocket).
- tests: agent-service jwt.test.ts + server.test.ts guard cases; access-control
  AgentAccessAuthSpec.
---
 .../resource/AccessControlResource.scala      |  60 +++++++-
 .../resource/AgentAccessAuthSpec.scala        |  94 +++++++++++++
 agent-service/src/api/auth-api.ts             |  42 ++----
 agent-service/src/config/env.ts               |   7 +
 agent-service/src/config/jwt.test.ts          |  94 +++++++++++++
 agent-service/src/config/jwt.ts               | 131 ++++++++++++++++++
 agent-service/src/server.test.ts              |  75 +++++++++-
 agent-service/src/server.ts                   |  22 ++-
 bin/agent-service.dockerfile                  |   4 +
 .../templates/agent-service-deployment.yaml   |   8 ++
 .../agent-service-security-policy.yaml        |  43 ++++++
 bin/single-node/nginx.conf                    |  22 +++
 .../workspace/service/agent/agent.service.ts  |  51 ++++---
 13 files changed, 599 insertions(+), 54 deletions(-)
 create mode 100644 access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala
 create mode 100644 agent-service/src/config/jwt.test.ts
 create mode 100644 agent-service/src/config/jwt.ts
 create mode 100644 bin/k8s/templates/agent-service-security-policy.yaml

diff --git a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala
index 8a225ad4c61..d279b9cf386 100644
--- a/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala
+++ b/access-control-service/src/main/scala/org/apache/texera/service/resource/AccessControlResource.scala
@@ -27,7 +27,7 @@ import org.apache.texera.auth.JwtParser.parseToken
 import org.apache.texera.auth.SessionUser
 import org.apache.texera.auth.util.{ComputingUnitAccess, HeaderField}
 import org.apache.texera.config.KubernetesConfig
-import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum
+import org.apache.texera.dao.jooq.generated.enums.{PrivilegeEnum, UserRoleEnum}
 
 import java.net.URLDecoder
 import java.nio.charset.StandardCharsets
@@ -44,6 +44,8 @@ object AccessControlResource extends LazyLogging {
   private val apiExecutionsStats: Regex = """.*/api/executions/[0-9]+/stats/[0-9]+.*""".r
   private val apiExecutionsResultExport: Regex = """.*/api/executions/result/export.*""".r
   private val pveRoute: Regex = """^/?(?:auth/)?(?:api/|wsapi/)?pve(?:/.*)?$""".r
+  // Agent service: authenticate any /api/agents request (Phase 1 — see #5561).
+  private val apiAgents: Regex = """.*/api/agents.*""".r
   // Path patterns whose cuid lives in the URL path rather than the query string.
   private val pvePvesCuidPath: Regex = """^/?(?:auth/)?(?:api/|wsapi/)?pve/pves/([0-9]+)$""".r
   private val pvePackagesCuidPath: Regex =
@@ -68,12 +70,68 @@ object AccessControlResource extends LazyLogging {
       case wsapiWorkflowWebsocket() | apiExecutionsStats() | apiExecutionsResultExport() |
           pveRoute() =>
         checkComputingUnitAccess(uriInfo, headers, bodyOpt)
+      case apiAgents() =>
+        checkAgentAccess(uriInfo, headers, bodyOpt)
       case _ =>
         logger.warn(s"No authorization logic for path: $path. Denying access.")
         Response.status(Response.Status.FORBIDDEN).build()
     }
   }
 
+  // Extract the bearer token from the access-token query param, the
+  // Authorization header, or a "token" field in the body (in that order).
+  private def extractBearerToken(
+      uriInfo: UriInfo,
+      headers: HttpHeaders,
+      bodyOpt: Option[String]
+  ): String = {
+    val qToken = Option(uriInfo.getQueryParameters().getFirst("access-token"))
+      .map(_.trim)
+      .filter(_.nonEmpty)
+    val hToken = Option(headers.getRequestHeader("Authorization"))
+      .flatMap(_.asScala.headOption)
+      .map(_.replaceFirst("(?i)^Bearer\\s+", "")) // case-insensitive "Bearer "
+      .map(_.trim)
+      .filter(_.nonEmpty)
+    val bToken = bodyOpt.flatMap(extractTokenFromBody)
+    qToken.orElse(hToken).orElse(bToken).getOrElse("")
+  }
+
+  // Phase 1 agent authorization: authenticate the JWT and require a
+  // REGULAR/ADMIN role. Any such user may reach any agent for now; per-agent
+  // ownership is deferred (see #5302 / #5561). On success, forward the user
+  // identity headers so the agent service can trust them.
+  private def checkAgentAccess(
+      uriInfo: UriInfo,
+      headers: HttpHeaders,
+      bodyOpt: Option[String]
+  ): Response = {
+    val token = extractBearerToken(uriInfo, headers, bodyOpt)
+    val userSession: Optional[SessionUser] =
+      try parseToken(token)
+      catch {
+        case e: Exception =>
+          logger.error(s"Failed parsing token for agent request: $e")
+          Optional.empty()
+      }
+
+    if (userSession.isEmpty) {
+      return Response.status(Response.Status.UNAUTHORIZED).build()
+    }
+
+    val user = userSession.get()
+    if (!(user.isRoleOf(UserRoleEnum.REGULAR) || user.isRoleOf(UserRoleEnum.ADMIN))) {
+      return Response.status(Response.Status.FORBIDDEN).build()
+    }
+
+    Response
+      .ok()
+      .header(HeaderField.UserId, user.getUid.toString)
+      .header(HeaderField.UserName, user.getName)
+      .header(HeaderField.UserEmail, user.getEmail)
+      .build()
+  }
+
   private def checkComputingUnitAccess(
       uriInfo: UriInfo,
       headers: HttpHeaders,
diff --git a/access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala b/access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala
new file mode 100644
index 00000000000..b723f6837a6
--- /dev/null
+++ b/access-control-service/src/test/scala/org/apache/texera/service/resource/AgentAccessAuthSpec.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.service.resource
+
+import jakarta.ws.rs.core.{HttpHeaders, MultivaluedHashMap, Response, UriInfo}
+import org.apache.texera.auth.JwtAuth
+import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
+import org.apache.texera.dao.jooq.generated.tables.pojos.User
+import org.mockito.Mockito.{mock, when}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.net.URI
+import java.util.Collections
+
+/**
+  * Exercises the `/api/agents` branch of [[AccessControlResource.authorize]] —
+  * the Phase 1 ext_authz gate that authenticates the JWT and requires a
+  * REGULAR/ADMIN role. Tokens are minted with the same [[JwtAuth]] the rest of
+  * Texera uses, so this is a real signature round-trip against auth.conf.
+  */
+class AgentAccessAuthSpec extends AnyFlatSpec with Matchers {
+
+  private def token(role: UserRoleEnum): String = {
+    val u = new User()
+    u.setUid(7)
+    u.setName("agent-user")
+    u.setEmail("agent-user@example.com")
+    u.setGoogleId(null)
+    u.setRole(role)
+    JwtAuth.jwtToken(JwtAuth.jwtClaims(u, expireInDays = 1))
+  }
+
+  private def authorize(path: String, authHeader: Option[String]): Response = {
+    val uriInfo = mock(classOf[UriInfo])
+    when(uriInfo.getPath).thenReturn(path)
+    when(uriInfo.getRequestUri).thenReturn(URI.create(s"http://localhost/$path"))
+    when(uriInfo.getQueryParameters()).thenReturn(new MultivaluedHashMap[String, String]())
+
+    val headers = mock(classOf[HttpHeaders])
+    when(headers.getRequestHeaders).thenReturn(new MultivaluedHashMap[String, String]())
+    when(headers.getRequestHeader("Authorization")).thenReturn(
+      authHeader.map((h: String) => Collections.singletonList(h)).orNull
+    )
+
+    AccessControlResource.authorize(uriInfo, headers)
+  }
+
+  private val agentPath = "auth/api/agents/agent-1"
+
+  "authorize on /api/agents" should "return 200 for a REGULAR user" in {
+    authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.REGULAR)}")).getStatus shouldBe 200
+  }
+
+  it should "return 200 for an ADMIN user" in {
+    authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.ADMIN)}")).getStatus shouldBe 200
+  }
+
+  it should "forward the user identity headers on success" in {
+    val resp = authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.REGULAR)}"))
+    resp.getHeaderString("x-user-id") shouldBe "7"
+    resp.getHeaderString("x-user-name") shouldBe "agent-user"
+    resp.getHeaderString("x-user-email") shouldBe "agent-user@example.com"
+  }
+
+  it should "return 401 when no token is present" in {
+    authorize(agentPath, None).getStatus shouldBe 401
+  }
+
+  it should "return 401 for a malformed / mis-signed token" in {
+    authorize(agentPath, Some("Bearer not-a-real-jwt")).getStatus shouldBe 401
+  }
+
+  it should "return 403 for an INACTIVE-role user" in {
+    authorize(agentPath, Some(s"Bearer ${token(UserRoleEnum.INACTIVE)}")).getStatus shouldBe 403
+  }
+}
diff --git a/agent-service/src/api/auth-api.ts b/agent-service/src/api/auth-api.ts
index 087f93ac46f..02be1e0c1a1 100644
--- a/agent-service/src/api/auth-api.ts
+++ b/agent-service/src/api/auth-api.ts
@@ -18,43 +18,29 @@
  */
 
 import type { UserInfo } from "../types/agent";
+import { verifyToken } from "../config/jwt";
 
 export type { UserInfo } from "../types/agent";
+export { verifyToken } from "../config/jwt";
 
-function decodeJWT(token: string): any {
-  try {
-    const parts = token.split(".");
-    if (parts.length !== 3) {
-      throw new Error("Invalid JWT format");
-    }
-    return JSON.parse(Buffer.from(parts[1], "base64").toString("utf-8"));
-  } catch (error) {
-    throw new Error(`Failed to decode JWT: ${error}`);
-  }
-}
-
+/** Verify the token's signature + expiry and return its user claims. Throws on
+ *  an invalid/expired/mis-signed token. */
 export function extractUserFromToken(token: string): UserInfo {
-  const payload = decodeJWT(token);
+  const user = verifyToken(token);
+  if (!user) {
+    throw new Error("Invalid or expired token");
+  }
   return {
-    uid: payload.userId,
-    name: payload.sub,
-    email: payload.email || "",
-    role: payload.role || "REGULAR",
+    uid: user.uid,
+    name: user.name,
+    email: user.email,
+    role: user.role,
   };
 }
 
-function isTokenExpired(token: string): boolean {
-  try {
-    const payload = decodeJWT(token);
-    if (!payload.exp) return false;
-    return Date.now() >= payload.exp * 1000;
-  } catch {
-    return true;
-  }
-}
-
+/** True only when the token is genuinely valid (signature + expiry verified). */
 export function validateToken(token: string): boolean {
-  return !isTokenExpired(token);
+  return verifyToken(token) !== null;
 }
 
 export function createAuthHeaders(token: string): Record<string, string> {
diff --git a/agent-service/src/config/env.ts b/agent-service/src/config/env.ts
index d8828d0647d..5066901e3bf 100644
--- a/agent-service/src/config/env.ts
+++ b/agent-service/src/config/env.ts
@@ -36,6 +36,13 @@ const EnvSchema = z.object({
   WORKFLOW_COMPILING_SERVICE_ENDPOINT: z.string().url().default("http://localhost:9090"),
   WORKFLOW_EXECUTION_SERVICE_ENDPOINT: z.string().url().default("http://localhost:8085"),
   EXECUTION_ENDPOINT_TEMPLATE: z.string().optional(),
+
+  // Shared JWT secret (HS256). Overrides the default in auth.conf, mirroring
+  // common/config AuthConfig. When unset, the secret is read from auth.conf.
+  AUTH_JWT_SECRET: z.string().optional(),
+  // Path to auth.conf; defaults are probed in config/jwt.ts (bundled in the
+  // image, or the repo path in local dev).
+  AUTH_CONF_PATH: z.string().optional(),
 });
 
 export const env = EnvSchema.parse(process.env);
diff --git a/agent-service/src/config/jwt.test.ts b/agent-service/src/config/jwt.test.ts
new file mode 100644
index 00000000000..244b79ad9ba
--- /dev/null
+++ b/agent-service/src/config/jwt.test.ts
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { describe, expect, test } from "bun:test";
+import { createHmac } from "node:crypto";
+import { JWT_SECRET, verifyToken } from "./jwt";
+
+function b64url(input: string | Buffer): string {
+  return Buffer.from(input).toString("base64url");
+}
+
+// Mints an HS256 token the same way org.apache.texera.auth.JwtAuth does.
+function sign(payload: Record<string, unknown>, opts: { secret?: string; alg?: string } = {}): string {
+  const alg = opts.alg ?? "HS256";
+  const secret = opts.secret ?? JWT_SECRET;
+  const header = b64url(JSON.stringify({ alg, typ: "JWT" }));
+  const body = b64url(JSON.stringify(payload));
+  const signature = b64url(createHmac("sha256", new TextEncoder().encode(secret)).update(`${header}.${body}`).digest());
+  return `${header}.${body}.${signature}`;
+}
+
+const now = () => Math.floor(Date.now() / 1000);
+const validClaims = () => ({
+  sub: "alice",
+  userId: 42,
+  email: "alice@example.com",
+  role: "REGULAR",
+  exp: now() + 3600,
+});
+
+describe("verifyToken", () => {
+  test("accepts a correctly-signed, unexpired token and returns its claims", () => {
+    const user = verifyToken(sign(validClaims()));
+    expect(user).not.toBeNull();
+    expect(user).toEqual({ uid: 42, name: "alice", email: "alice@example.com", role: "REGULAR" });
+  });
+
+  test("coerces a string userId claim to a number", () => {
+    const user = verifyToken(sign({ ...validClaims(), userId: "7" }));
+    expect(user?.uid).toBe(7);
+  });
+
+  test("rejects a token signed with a different secret", () => {
+    expect(verifyToken(sign(validClaims(), { secret: "not-the-real-secret" }))).toBeNull();
+  });
+
+  test("rejects a tampered payload (signature no longer matches)", () => {
+    const token = sign(validClaims());
+    const [h, , s] = token.split(".");
+    const forged = b64url(JSON.stringify({ ...validClaims(), userId: 999 }));
+    expect(verifyToken(`${h}.${forged}.${s}`)).toBeNull();
+  });
+
+  test("rejects an expired token (beyond the 30s clock skew)", () => {
+    expect(verifyToken(sign({ ...validClaims(), exp: now() - 60 }))).toBeNull();
+  });
+
+  test("accepts a just-expired token within the 30s clock skew", () => {
+    expect(verifyToken(sign({ ...validClaims(), exp: now() - 10 }))).not.toBeNull();
+  });
+
+  test("rejects a non-HS256 algorithm (none / alg-confusion)", () => {
+    expect(verifyToken(sign(validClaims(), { alg: "none" }))).toBeNull();
+  });
+
+  test("rejects tokens missing required exp / sub / userId claims", () => {
+    expect(verifyToken(sign({ sub: "alice", userId: 42 }))).toBeNull(); // no exp
+    expect(verifyToken(sign({ userId: 42, exp: now() + 60 }))).toBeNull(); // no sub
+    expect(verifyToken(sign({ sub: "alice", exp: now() + 60 }))).toBeNull(); // no userId
+  });
+
+  test("rejects malformed / empty tokens", () => {
+    expect(verifyToken(undefined)).toBeNull();
+    expect(verifyToken("")).toBeNull();
+    expect(verifyToken("not-a-jwt")).toBeNull();
+    expect(verifyToken("only.two")).toBeNull();
+  });
+});
diff --git a/agent-service/src/config/jwt.ts b/agent-service/src/config/jwt.ts
new file mode 100644
index 00000000000..ec10dccdaba
--- /dev/null
+++ b/agent-service/src/config/jwt.ts
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { createHmac, timingSafeEqual } from "node:crypto";
+import { existsSync, readFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { env } from "./env";
+import { createLogger } from "../logger";
+
+const log = createLogger("Jwt");
+
+// Token issuance lives in the Scala services (org.apache.texera.auth.JwtAuth):
+// HS256 over the UTF-8 bytes of the secret, with a required `exp` and `sub`
+// and a 30s allowed clock skew. This module mirrors the verification so the
+// agent service can validate the same tokens without a gateway.
+const CLOCK_SKEW_SECONDS = 30;
+
+// auth.conf default, used as the last-resort fallback when neither the env
+// override nor the file is available (matches AuthConfig's literal default).
+const AUTH_CONF_DEFAULT_SECRET = "8a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d";
+
+/** Read the `256-bit-secret` default out of auth.conf, if the file is found. */
+function readSecretFromAuthConf(): string | undefined {
+  const candidates = [
+    env.AUTH_CONF_PATH, // explicit override
+    "auth.conf", // bundled next to the app in the container image
+    "../common/config/src/main/resources/auth.conf", // repo layout in local dev
+  ].filter((p): p is string => typeof p === "string" && p.length > 0);
+
+  for (const candidate of candidates) {
+    const path = resolve(process.cwd(), candidate);
+    if (!existsSync(path)) continue;
+    // The first quoted `256-bit-secret = "..."` is the literal default; the
+    // second line is the `${?AUTH_JWT_SECRET}` env override (handled below).
+    const match = readFileSync(path, "utf-8").match(/256-bit-secret\s*=\s*"([^"]*)"/);
+    if (match) {
+      log.info({ path }, "loaded JWT secret default from auth.conf");
+      return match[1];
+    }
+  }
+  log.warn("auth.conf not found; falling back to the built-in default JWT secret");
+  return undefined;
+}
+
+// Resolution order mirrors HOCON's `256-bit-secret = "<default>"; 256-bit-secret
+// = ${?AUTH_JWT_SECRET}`, and the `.toLowerCase()` normalization AuthConfig
+// applies. ("random" is intentionally unsupported here — it cannot match across
+// processes, so deployments share a fixed secret.)
+export const JWT_SECRET: string = (
+  env.AUTH_JWT_SECRET ||
+  readSecretFromAuthConf() ||
+  AUTH_CONF_DEFAULT_SECRET
+).toLowerCase();
+
+const SECRET_BYTES = new TextEncoder().encode(JWT_SECRET);
+
+export interface JwtUser {
+  uid: number;
+  name: string;
+  email: string;
+  role: string;
+}
+
+function decodeSegment(segment: string): unknown {
+  return JSON.parse(Buffer.from(segment, "base64url").toString("utf-8"));
+}
+
+/**
+ * Verify an HS256 JWT issued by the Scala services and return its claims, or
+ * null if the token is missing, malformed, mis-signed, expired, or uses a
+ * different algorithm. Mirrors org.apache.texera.auth.JwtParser.parseToken.
+ */
+export function verifyToken(token: string | undefined | null): JwtUser | null {
+  if (!token) return null;
+  try {
+    const parts = token.split(".");
+    if (parts.length !== 3) return null;
+    const [headerB64, payloadB64, signatureB64] = parts;
+
+    const header = decodeSegment(headerB64) as { alg?: string };
+    // Reject anything that is not HS256 (defends against "none" / alg-confusion).
+    if (header.alg !== "HS256") return null;
+
+    const expected = createHmac("sha256", SECRET_BYTES).update(`${headerB64}.${payloadB64}`).digest();
+    const actual = Buffer.from(signatureB64, "base64url");
+    if (expected.length !== actual.length || !timingSafeEqual(expected, actual)) {
+      return null;
+    }
+
+    const payload = decodeSegment(payloadB64) as {
+      exp?: number;
+      sub?: string;
+      userId?: number | string;
+      email?: string;
+      role?: string;
+    };
+
+    // `exp` and `sub` are required by JwtAuth.jwtConsumer.
+    if (typeof payload.exp !== "number") return null;
+    if (typeof payload.sub !== "string" || payload.sub.length === 0) return null;
+    if (payload.userId === undefined || payload.userId === null) return null;
+
+    const nowSeconds = Math.floor(Date.now() / 1000);
+    if (nowSeconds > payload.exp + CLOCK_SKEW_SECONDS) return null;
+
+    return {
+      uid: Number(payload.userId),
+      name: payload.sub,
+      email: typeof payload.email === "string" ? payload.email : "",
+      role: typeof payload.role === "string" ? payload.role : "REGULAR",
+    };
+  } catch {
+    return null;
+  }
+}
diff --git a/agent-service/src/server.test.ts b/agent-service/src/server.test.ts
index 5eb15b6e248..eeff98fe742 100644
--- a/agent-service/src/server.test.ts
+++ b/agent-service/src/server.test.ts
@@ -18,8 +18,10 @@
  */
 
 import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+import { createHmac } from "node:crypto";
 import { buildApp, _resetAgentStoreForTests } from "./server";
 import { env } from "./config/env";
+import { JWT_SECRET } from "./config/jwt";
 
 const API = env.API_PREFIX;
 const app = buildApp();
@@ -28,11 +30,38 @@ function url(path: string): string {
   return `http://localhost${path}`;
 }
 
+// Mint a valid HS256 token (same shape JwtAuth issues) so requests pass the
+// agent-service auth guard; tests that probe the guard itself omit/forge it.
+function b64url(input: string | Buffer): string {
+  return Buffer.from(input).toString("base64url");
+}
+function signToken(claims: Record<string, unknown> = {}): string {
+  const header = b64url(JSON.stringify({ alg: "HS256", typ: "JWT" }));
+  const payload = b64url(
+    JSON.stringify({
+      sub: "tester",
+      userId: 1,
+      email: "tester@example.com",
+      role: "REGULAR",
+      exp: Math.floor(Date.now() / 1000) + 3600,
+      ...claims,
+    })
+  );
+  const signature = b64url(
+    createHmac("sha256", new TextEncoder().encode(JWT_SECRET)).update(`${header}.${payload}`).digest()
+  );
+  return `${header}.${payload}.${signature}`;
+}
+const VALID_TOKEN = signToken();
+function authHeaders(extra: Record<string, string> = {}): Record<string, string> {
+  return { Authorization: `Bearer ${VALID_TOKEN}`, ...extra };
+}
+
 async function postJson(path: string, body: unknown): Promise<Response> {
   return app.handle(
     new Request(url(path), {
       method: "POST",
-      headers: { "Content-Type": "application/json" },
+      headers: authHeaders({ "Content-Type": "application/json" }),
       body: JSON.stringify(body),
     })
   );
@@ -42,18 +71,18 @@ async function patchJson(path: string, body: unknown): Promise<Response> {
   return app.handle(
     new Request(url(path), {
       method: "PATCH",
-      headers: { "Content-Type": "application/json" },
+      headers: authHeaders({ "Content-Type": "application/json" }),
       body: JSON.stringify(body),
     })
   );
 }
 
 async function getJson(path: string): Promise<Response> {
-  return app.handle(new Request(url(path)));
+  return app.handle(new Request(url(path), { headers: authHeaders() }));
 }
 
 async function del(path: string): Promise<Response> {
-  return app.handle(new Request(url(path), { method: "DELETE" }));
+  return app.handle(new Request(url(path), { method: "DELETE", headers: authHeaders() }));
 }
 
 async function readJson<T = unknown>(res: Response): Promise<T> {
@@ -72,6 +101,44 @@ describe(`GET ${API}/healthcheck`, () => {
     expect(body.status).toBe("ok");
     expect(typeof body.timestamp).toBe("string");
   });
+
+  test("does not require a token (readiness probe)", async () => {
+    const res = await app.handle(new Request(url(`${API}/healthcheck`)));
+    expect(res.status).toBe(200);
+  });
+});
+
+describe("auth guard on agent routes", () => {
+  test("rejects a request with no Authorization header (401)", async () => {
+    const res = await app.handle(new Request(url(`${API}/agents`)));
+    expect(res.status).toBe(401);
+    expect(await readJson<{ error: string }>(res)).toEqual({ error: "Unauthorized" });
+  });
+
+  test("rejects an invalid / mis-signed token (401)", async () => {
+    const res = await app.handle(
+      new Request(url(`${API}/agents`), { headers: { Authorization: "Bearer not-a-valid-jwt" } })
+    );
+    expect(res.status).toBe(401);
+  });
+
+  test("rejects an expired token (401)", async () => {
+    const expired = signToken({ exp: Math.floor(Date.now() / 1000) - 3600 });
+    const res = await app.handle(
+      new Request(url(`${API}/agents`), { headers: { Authorization: `Bearer ${expired}` } })
+    );
+    expect(res.status).toBe(401);
+  });
+
+  test("accepts a valid token", async () => {
+    const res = await app.handle(new Request(url(`${API}/agents`), { headers: authHeaders() }));
+    expect(res.status).toBe(200);
+  });
+
+  test("guards the models endpoint too", async () => {
+    const res = await app.handle(new Request(url(`${API}/agents/models`)));
+    expect(res.status).toBe(401);
+  });
 });
 
 describe(`POST ${API}/agents`, () => {
diff --git a/agent-service/src/server.ts b/agent-service/src/server.ts
index 2a328bb7c1d..f208a1d877b 100644
--- a/agent-service/src/server.ts
+++ b/agent-service/src/server.ts
@@ -23,7 +23,7 @@ import { createOpenAI } from "@ai-sdk/openai";
 import { TexeraAgent } from "./agent/texera-agent";
 import { getVisibleResultHeaders } from "./agent/tools/tools-utility";
 import { getBackendConfig } from "./api/backend-api";
-import { extractUserFromToken, validateToken } from "./api/auth-api";
+import { extractUserFromToken, validateToken, verifyToken } from "./api/auth-api";
 import { retrieveWorkflow } from "./api/workflow-api";
 import { WorkflowSystemMetadata } from "./agent/util/workflow-system-metadata";
 import { env } from "./config/env";
@@ -162,6 +162,17 @@ const agentsRouter = new Elysia({ prefix: "/agents" })
     set.status = 500;
     return { error: errorMessage || "Internal server error" };
   })
+  // Authenticate every agent request by verifying the Bearer JWT ourselves
+  // (defense in depth — the gateway ext_authz also checks it, but this also
+  // covers direct access, e.g. bare-metal dev). The WebSocket route is guarded
+  // separately in its open() handler via the access-token query param.
+  .onBeforeHandle(({ request, set }) => {
+    const token = (request.headers.get("authorization") ?? "").replace(/^Bearer\s+/i, "").trim();
+    if (!verifyToken(token)) {
+      set.status = 401;
+      return { error: "Unauthorized" };
+    }
+  })
   .get("/", () => {
     const agentList = Array.from(agentStore.entries()).map(([id, agent]) => getAgentInfo(id, agent));
     return { agents: agentList };
@@ -509,6 +520,15 @@ export function buildApp() {
     )
     .ws(`${env.API_PREFIX}/agents/:id/react`, {
       open(ws) {
+        // Browsers can't set headers on a WebSocket, so the JWT arrives as the
+        // access-token query param (same convention as the workflow WS).
+        const token = (ws.data as any).query?.["access-token"];
+        if (!verifyToken(token)) {
+          ws.send(JSON.stringify({ type: "error", error: "Unauthorized" }));
+          ws.close();
+          return;
+        }
+
         const agentId = (ws.data as any).params?.id;
         wsLog.info({ agentId }, "client connected");
 
diff --git a/bin/agent-service.dockerfile b/bin/agent-service.dockerfile
index 7edf38287aa..5722644735a 100644
--- a/bin/agent-service.dockerfile
+++ b/bin/agent-service.dockerfile
@@ -35,6 +35,10 @@ RUN bun install --frozen-lockfile --production
 COPY agent-service/src ./src
 COPY agent-service/tsconfig.json ./
 
+# Shared JWT secret default (HS256) used to verify Bearer tokens. The effective
+# secret is AUTH_JWT_SECRET when set; otherwise this file's default is used.
+COPY common/config/src/main/resources/auth.conf ./auth.conf
+
 COPY agent-service/LICENSE-binary ./LICENSE
 COPY NOTICE ./NOTICE
 COPY DISCLAIMER ./DISCLAIMER
diff --git a/bin/k8s/templates/agent-service-deployment.yaml b/bin/k8s/templates/agent-service-deployment.yaml
index 98a38f2408a..c7846732f94 100644
--- a/bin/k8s/templates/agent-service-deployment.yaml
+++ b/bin/k8s/templates/agent-service-deployment.yaml
@@ -61,6 +61,14 @@ spec:
                 secretKeyRef:
                   name: {{ .Release.Name }}-agent-service-secret
                   key: litellm-master-key
+            # Shared JWT secret used to verify Bearer tokens. Sourced from the
+            # same texeraEnvVars entry the Scala services use, so they agree.
+            {{- range .Values.texeraEnvVars }}
+            {{- if eq .name "AUTH_JWT_SECRET" }}
+            - name: {{ .name }}
+              value: "{{ .value }}"
+            {{- end }}
+            {{- end }}
           # The service loads operator metadata from the dashboard service on
           # startup, so gate readiness on its health endpoint before the gateway
           # routes traffic here. /api/healthcheck needs no auth.
diff --git a/bin/k8s/templates/agent-service-security-policy.yaml b/bin/k8s/templates/agent-service-security-policy.yaml
new file mode 100644
index 00000000000..b88a31b6dcb
--- /dev/null
+++ b/bin/k8s/templates/agent-service-security-policy.yaml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Authenticates every /api/agents request through the access-control-service
+# before it reaches the agent service (mirrors the dynamic-routes ext-auth).
+# The access-control-service verifies the JWT and requires a REGULAR/ADMIN
+# role, then injects the trusted x-user-* identity headers.
+{{- if .Values.agentService.enabled }}
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: SecurityPolicy
+metadata:
+  name: {{ .Release.Name }}-agent-ext-auth
+  namespace: {{ .Release.Namespace }}
+spec:
+  targetRefs:
+    - group: gateway.networking.k8s.io
+      kind: HTTPRoute
+      name: {{ .Release.Name }}-agent-service-route
+  extAuth:
+    http:
+      backendRefs:
+        - name: {{ .Values.accessControlService.name }}-svc
+          port: {{ .Values.accessControlService.service.port }}
+      path: /api/auth
+      headersToBackend:
+        - x-user-id
+        - x-user-name
+        - x-user-email
+{{- end }}
diff --git a/bin/single-node/nginx.conf b/bin/single-node/nginx.conf
index 70947c00792..52091137a00 100644
--- a/bin/single-node/nginx.conf
+++ b/bin/single-node/nginx.conf
@@ -24,6 +24,13 @@ http {
     # Suppress per-request access logs; errors still go to error_log.
     access_log off;
 
+    # Upstream for the agent ext_authz subrequest. Declaring it as an upstream
+    # lets the variable proxy_pass below ($request_uri) resolve the name at
+    # startup, without needing a separate runtime resolver.
+    upstream access_control_authz {
+        server access-control-service:9096;
+    }
+
     server {
         listen 8080;
 
@@ -59,7 +66,10 @@ http {
 
         # LLM traffic (chat + model list) is handled by the agent-service, which
         # talks to LiteLLM directly; /api/agents below already routes there.
+        # Each request is first authenticated by the access-control-service
+        # (ext_authz) via the auth_request subrequest below.
         location /api/agents {
+            auth_request /_agent_auth;
             proxy_pass http://agent-service:3001;
             proxy_http_version 1.1;
             proxy_set_header Upgrade $http_upgrade;
@@ -70,6 +80,18 @@ http {
             proxy_send_timeout 1d;
         }
 
+        # Internal ext_authz subrequest: the access-control-service authorizes
+        # /api/agents by JWT. $request_uri preserves the original path so its
+        # /api/agents branch matches (same shape Envoy produces); the
+        # Authorization header and the WS access-token query ride along.
+        location = /_agent_auth {
+            internal;
+            proxy_pass http://access_control_authz/api/auth$request_uri;
+            proxy_pass_request_body off;
+            proxy_set_header Content-Length "";
+            proxy_set_header X-Original-URI $request_uri;
+        }
+
         location /api/ {
             proxy_pass http://dashboard-service:8080;
             proxy_set_header Host $host;
diff --git a/frontend/src/app/workspace/service/agent/agent.service.ts b/frontend/src/app/workspace/service/agent/agent.service.ts
index 2c922f5e085..8e17fab6fe6 100644
--- a/frontend/src/app/workspace/service/agent/agent.service.ts
+++ b/frontend/src/app/workspace/service/agent/agent.service.ts
@@ -240,6 +240,12 @@ export class AgentService {
    */
   private agentHeaders(agentId?: string): { headers: HttpHeaders } {
     let headers = new HttpHeaders();
+    // The access-control-service authorizes every agent request by JWT, so
+    // attach the Bearer token to all agent-service calls.
+    const token = AuthService.getAccessToken();
+    if (token) {
+      headers = headers.set("Authorization", `Bearer ${token}`);
+    }
     if (agentId) {
       const wid = this.agentStateTracking.get(agentId)?.workflowId;
       if (wid !== undefined) {
@@ -408,9 +414,12 @@ export class AgentService {
    * Start WebSocket connection for real-time ReActSteps updates
    */
   private startStatePolling(agentId: string, tracking: AgentStateTracking): void {
-    // Build WebSocket URL
+    // Build WebSocket URL. Browsers can't set headers on a WebSocket, so the
+    // JWT is passed as the access-token query param (same as the workflow WS).
     const wsProtocol = window.location.protocol === "https:" ? "wss:" : "ws:";
-    const wsUrl = `${wsProtocol}//${window.location.host}${this.AGENT_API_BASE}/agents/${agentId}/react`;
+    const token = AuthService.getAccessToken();
+    const tokenParam = token ? `?access-token=${encodeURIComponent(token)}` : "";
+    const wsUrl = `${wsProtocol}//${window.location.host}${this.AGENT_API_BASE}/agents/${agentId}/react${tokenParam}`;
 
     const ws = new WebSocket(wsUrl);
     tracking.websocket = ws;
@@ -712,7 +721,7 @@ export class AgentService {
         }
       }
 
-      return this.http.post<ApiAgentInfo>(`${this.AGENT_API_BASE}/agents`, body).pipe(
+      return this.http.post<ApiAgentInfo>(`${this.AGENT_API_BASE}/agents`, body, this.agentHeaders()).pipe(
         map(response => {
           const agentInfo: AgentInfo = {
             id: response.id,
@@ -792,7 +801,7 @@ export class AgentService {
    * Also syncs local cache with backend - removes any stale agents that no longer exist on the backend.
    */
   public getAllAgents(): Observable<AgentInfo[]> {
-    return this.http.get<ApiAgentListResponse>(`${this.AGENT_API_BASE}/agents`).pipe(
+    return this.http.get<ApiAgentListResponse>(`${this.AGENT_API_BASE}/agents`, this.agentHeaders()).pipe(
       map(response => {
         const agents = response.agents.map(a => ({
           id: a.id,
@@ -864,21 +873,23 @@ export class AgentService {
    */
   public fetchModelTypes(): Observable<ModelType[]> {
     if (!this.modelTypes$) {
-      this.modelTypes$ = this.http.get<LiteLLMModelsResponse>(`${AppSettings.getApiEndpoint()}/agents/models`).pipe(
-        map(response =>
-          response.data.map((model: LiteLLMModel) => ({
-            id: model.id,
-            name: this.formatModelName(model.id),
-            description: `Model: ${model.id}`,
-            icon: "robot",
-          }))
-        ),
-        catchError((error: unknown) => {
-          console.error("Failed to fetch models from API:", error);
-          return of([]);
-        }),
-        shareReplay(1)
-      );
+      this.modelTypes$ = this.http
+        .get<LiteLLMModelsResponse>(`${AppSettings.getApiEndpoint()}/agents/models`, this.agentHeaders())
+        .pipe(
+          map(response =>
+            response.data.map((model: LiteLLMModel) => ({
+              id: model.id,
+              name: this.formatModelName(model.id),
+              description: `Model: ${model.id}`,
+              icon: "robot",
+            }))
+          ),
+          catchError((error: unknown) => {
+            console.error("Failed to fetch models from API:", error);
+            return of([]);
+          }),
+          shareReplay(1)
+        );
     }
     return this.modelTypes$;
   }
@@ -1036,7 +1047,7 @@ export class AgentService {
    * The backend broadcasts headChange + visible steps via WebSocket to all clients.
    */
   public checkoutStep(agentId: string, stepId: string): Observable<any> {
-    return this.http.post(`${this.AGENT_API_BASE}/agents/${agentId}/checkout`, { stepId });
+    return this.http.post(`${this.AGENT_API_BASE}/agents/${agentId}/checkout`, { stepId }, this.agentHeaders(agentId));
   }
 
   /**