IBM · bradleyjeck · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.env.public b/.env.public
@@ -12,3 +12,7 @@ WATSONX_URL=https://us-south.ml.cloud.ibm.com   # optional
 # ── LiteLLM (plan-execute runner) ────────────────────────────────────────────
 LITELLM_API_KEY=
 LITELLM_BASE_URL=
+
+# --  
+SCENARIO_SERVER_URI=
+MLFLOW_TRACKING_URI=
diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.14
+3.12
diff --git a/aobench/scenario-client/src/scenario_client/client.py b/aobench/scenario-client/src/scenario_client/client.py
@@ -204,7 +204,7 @@ def scenario_set(
 
                     mlflow.set_tracking_uri(uri=tracking_uri)
 
-                    mlflow.langchain.autolog()
+                    mlflow.autolog()
                     mlflow.set_experiment(experiment_id=experiment_id)
 
                     return scenario_set, tracking_context

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "pyyaml>=6.0",
     "litellm>=1.0",
     "python-dotenv>=1.0",
+    "scenario-client",
 ]
 
 [project.scripts]
@@ -51,3 +52,8 @@ filterwarnings = [
     "ignore:Core Pydantic V1 functionality:UserWarning",
 ]
 
+
+
+[tool.uv.sources]
+scenario-client = {path = "./aobench/scenario-client"}
+
diff --git a/src/workflow/tracking.py b/src/workflow/tracking.py
@@ -0,0 +1,93 @@
+"""
+Sample client for tracking Asset Ops Bench runs
+
+run this like:
+$ uv run src/workflow/tracking/tracking.py 
+
+"""
+from os import environ
+import asyncio, json
+
+from dotenv import load_dotenv
+from scenario_client.client import AOBench
+
+from llm.litellm import LiteLLMBackend
+from workflow.runner import PlanExecuteRunner
+
+
+def main():
+
+    # environment variables for scenario server and mlflow server
+    abi: str = environ["SCENARIO_SERVER_URI"]
+    mfi: str = environ["MLFLOW_TRACKING_URI"]
+
+    # AOBench client
+    aob = AOBench(scenario_uri=abi, tracking_uri=mfi)
+
+    # Pick the scenario set of interest
+    scenario_set_id = "b3aa206a-f7dc-43c9-a1f4-dcf984417487" #Asset Ops Bench - IoT
+    # enable tracking on mlflow
+    tracking = True
+
+    # get the scenarios from the server
+    scenario_set, tracking_context = aob.scenario_set(
+        scenario_set_id=scenario_set_id, tracking=tracking
+    )
+
+    scenarios = [
+        {"id": s["id"], "query": s["query"]} for s in scenario_set["scenarios"]
+    ]
+
+    # provide the name of this run
+    run_name = "demo first 5"
+
+    # Loop over first five scenarios and collect the agent responses
+    answers = []
+    for scenario in scenarios[:5]:
+        scenario_id = scenario["id"]
+        query = scenario["query"]
+
+        print(f"{scenario_id=}")
+        print(f"{query=}")
+
+        runner = PlanExecuteRunner(llm=LiteLLMBackend("watsonx/meta-llama/llama-3-3-70b-instruct"))
+
+
+        try: 
+            response = asyncio.run( aob.arun(
+                afunc=runner.run,
+                scenario_id=scenario_id,
+                run_name=run_name,
+                tracking_context=tracking_context,
+                post_process=None,
+                question=query,
+            ))
+            print(f"{response=}")
+
+            answers.append(response)
+
+
+        except Exception as e:
+            print(e)
+
+        print(" * * * * ")
+
+
+    ## send the responses to the server for grading
+    ## server requires update w latest evals so this is commented out for now
+    #grades = aob.grade(
+    #    scenario_set_id=scenario_set_id,
+    #    answers=answers,
+    #    tracking_context=tracking_context,
+    #)
+
+    ## print the grading results to the console
+    #print(json.dumps(grades, indent=2))
+
+
+
+
+if __name__ == '__main__':
+    load_dotenv()
+    main()
+