Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.public
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ WATSONX_URL=https://us-south.ml.cloud.ibm.com # optional
# ── LiteLLM (plan-execute runner) ────────────────────────────────────────────
LITELLM_API_KEY=
LITELLM_BASE_URL=

# --
SCENARIO_SERVER_URI=
MLFLOW_TRACKING_URI=
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.14
3.12
2 changes: 1 addition & 1 deletion aobench/scenario-client/src/scenario_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def scenario_set(

mlflow.set_tracking_uri(uri=tracking_uri)

mlflow.langchain.autolog()
mlflow.autolog()
mlflow.set_experiment(experiment_id=experiment_id)

return scenario_set, tracking_context
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"pyyaml>=6.0",
"litellm>=1.0",
"python-dotenv>=1.0",
"scenario-client",
]

[project.scripts]
Expand Down Expand Up @@ -51,3 +52,8 @@ filterwarnings = [
"ignore:Core Pydantic V1 functionality:UserWarning",
]



[tool.uv.sources]
scenario-client = {path = "./aobench/scenario-client"}

93 changes: 93 additions & 0 deletions src/workflow/tracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
Sample client for tracking Asset Ops Bench runs

run this like:
$ uv run src/workflow/tracking/tracking.py

"""
from os import environ
import asyncio, json

from dotenv import load_dotenv
from scenario_client.client import AOBench

from llm.litellm import LiteLLMBackend
from workflow.runner import PlanExecuteRunner


def main():

# environment variables for scenario server and mlflow server
abi: str = environ["SCENARIO_SERVER_URI"]
mfi: str = environ["MLFLOW_TRACKING_URI"]

# AOBench client
aob = AOBench(scenario_uri=abi, tracking_uri=mfi)

# Pick the scenario set of interest
scenario_set_id = "b3aa206a-f7dc-43c9-a1f4-dcf984417487" #Asset Ops Bench - IoT
# enable tracking on mlflow
tracking = True

# get the scenarios from the server
scenario_set, tracking_context = aob.scenario_set(
scenario_set_id=scenario_set_id, tracking=tracking
)

scenarios = [
{"id": s["id"], "query": s["query"]} for s in scenario_set["scenarios"]
]

# provide the name of this run
run_name = "demo first 5"

# Loop over first five scenarios and collect the agent responses
answers = []
for scenario in scenarios[:5]:
scenario_id = scenario["id"]
query = scenario["query"]

print(f"{scenario_id=}")
print(f"{query=}")

runner = PlanExecuteRunner(llm=LiteLLMBackend("watsonx/meta-llama/llama-3-3-70b-instruct"))


try:
response = asyncio.run( aob.arun(
afunc=runner.run,
scenario_id=scenario_id,
run_name=run_name,
tracking_context=tracking_context,
post_process=None,
question=query,
))
print(f"{response=}")

answers.append(response)


except Exception as e:
print(e)

print(" * * * * ")


## send the responses to the server for grading
## server requires update w latest evals so this is commented out for now
#grades = aob.grade(
# scenario_set_id=scenario_set_id,
# answers=answers,
# tracking_context=tracking_context,
#)

## print the grading results to the console
#print(json.dumps(grades, indent=2))




if __name__ == '__main__':
load_dotenv()
main()