From f1aabd0091040e3de7fad6ddf43087a57fb24d9a Mon Sep 17 00:00:00 2001 From: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:30:46 +0000 Subject: [PATCH 1/5] example for tracking runs in mlflow Signed-off-by: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> --- src/workflow/tracking.py | 93 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 src/workflow/tracking.py diff --git a/src/workflow/tracking.py b/src/workflow/tracking.py new file mode 100644 index 00000000..870a26f0 --- /dev/null +++ b/src/workflow/tracking.py @@ -0,0 +1,93 @@ +""" +Sample client for tracking Asset Ops Bench runs + +run this like: +$ uv run src/workflow/tracking/tracking.py + +""" +from os import environ +import asyncio, json + +from dotenv import load_dotenv +from scenario_client.client import AOBench + +from llm.litellm import LiteLLMBackend +from workflow.runner import PlanExecuteRunner + + +def main(): + + # environment variables for scenario server and mlflow server + abi: str = environ["SCENARIO_SERVER_URI"] + mfi: str = environ["MLFLOW_TRACKING_URI"] + + # AOBench client + aob = AOBench(scenario_uri=abi, tracking_uri=mfi) + + # Pick the scenario set of interest + scenario_set_id = "b3aa206a-f7dc-43c9-a1f4-dcf984417487" #Asset Ops Bench - IoT + # enable tracking on mlflow + tracking = True + + # get the scenarios from the server + scenario_set, tracking_context = aob.scenario_set( + scenario_set_id=scenario_set_id, tracking=tracking + ) + + scenarios = [ + {"id": s["id"], "query": s["query"]} for s in scenario_set["scenarios"] + ] + + # provide the name of this run + run_name = "demo first 5" + + # Loop over first five scenarios and collect the agent responses + answers = [] + for scenario in scenarios[:5]: + scenario_id = scenario["id"] + query = scenario["query"] + + print(f"{scenario_id=}") + print(f"{query=}") + + runner = PlanExecuteRunner(llm=LiteLLMBackend("watsonx/meta-llama/llama-3-3-70b-instruct")) + + + try: + response = asyncio.run( aob.arun( + afunc=runner.run, + scenario_id=scenario_id, + run_name=run_name, + tracking_context=tracking_context, + post_process=None, + question=query, + )) + print(f"{response=}") + + answers.append(response) + + + except Exception as e: + print(e) + + print(" * * * * ") + + + ## send the responses to the server for grading + ## server requires update w latest evals so this is commented out for now + #grades = aob.grade( + # scenario_set_id=scenario_set_id, + # answers=answers, + # tracking_context=tracking_context, + #) + + ## print the grading results to the console + #print(json.dumps(grades, indent=2)) + + + + +if __name__ == '__main__': + load_dotenv() + main() + From 4ec4e0b0828967bbfe06536e370bbd0a753e2dd2 Mon Sep 17 00:00:00 2001 From: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:31:11 +0000 Subject: [PATCH 2/5] use generic autolog Signed-off-by: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> --- aobench/scenario-client/src/scenario_client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aobench/scenario-client/src/scenario_client/client.py b/aobench/scenario-client/src/scenario_client/client.py index 730a0a11..ab06d10b 100644 --- a/aobench/scenario-client/src/scenario_client/client.py +++ b/aobench/scenario-client/src/scenario_client/client.py @@ -204,7 +204,7 @@ def scenario_set( mlflow.set_tracking_uri(uri=tracking_uri) - mlflow.langchain.autolog() + mlflow.autolog() mlflow.set_experiment(experiment_id=experiment_id) return scenario_set, tracking_context From ae07e4cc2a48ca5c2e65ef0acd1794016a7f3b14 Mon Sep 17 00:00:00 2001 From: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:31:36 +0000 Subject: [PATCH 3/5] add scenario-client dep Signed-off-by: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 905ed17d..e5e33078 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "pyyaml>=6.0", "litellm>=1.0", "python-dotenv>=1.0", + "scenario-client", ] [project.scripts] @@ -51,3 +52,8 @@ filterwarnings = [ "ignore:Core Pydantic V1 functionality:UserWarning", ] + + +[tool.uv.sources] +scenario-client = {path = "./aobench/scenario-client"} + From c9d39c626322c4b5e44171b8e3b6a36b0f1db7a2 Mon Sep 17 00:00:00 2001 From: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:31:58 +0000 Subject: [PATCH 4/5] envs for tracking Signed-off-by: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> --- .env.public | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.env.public b/.env.public index a2be134a..0f24f270 100644 --- a/.env.public +++ b/.env.public @@ -12,3 +12,7 @@ WATSONX_URL=https://us-south.ml.cloud.ibm.com # optional # ── LiteLLM (plan-execute runner) ──────────────────────────────────────────── LITELLM_API_KEY= LITELLM_BASE_URL= + +# -- +SCENARIO_SERVER_URI= +MLFLOW_TRACKING_URI= \ No newline at end of file From f495eb14fbbad33508e5b4d0ed0ad7d9f91b35d1 Mon Sep 17 00:00:00 2001 From: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:32:17 +0000 Subject: [PATCH 5/5] my local needs python 3.12 Signed-off-by: Bradley Eck <7848456+bradleyjeck@users.noreply.github.com> --- .python-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.python-version b/.python-version index 6324d401..e4fba218 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.14 +3.12