Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion monai/bundle/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,8 @@ def run(
common parameters shown below will be added and can be passed through the `override` parameter of this method.

- ``"output_dir"``: the path to save mlflow tracking outputs locally, default to "<bundle root>/eval".
- ``"tracking_uri"``: uri to save mlflow tracking outputs, default to "/output_dir/mlruns".
- ``"tracking_uri"``: uri to save mlflow tracking outputs, default to a local SQLite database
at "<output_dir>/mlruns.db" with run artifacts kept under "<output_dir>/mlruns".
- ``"experiment_name"``: experiment name for this run, default to "monai_experiment".
- ``"run_name"``: the name of current run.
- ``"save_execute_config"``: whether to save the executed config files. It can be `False`, `/path/to/artifacts`
Expand Down
6 changes: 4 additions & 2 deletions monai/bundle/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,10 @@
"configs": {
# if no "output_dir" in the bundle config, default to "<bundle root>/eval"
"output_dir": "$@bundle_root + '/eval'",
# use URI to support linux, mac and windows os
"tracking_uri": "$monai.utils.path_to_uri(@output_dir) + '/mlruns'",
# MLflow 3.13+ rejects the filesystem (file store) tracking backend, so default tracking
# to a local SQLite database. The handler keeps run artifacts under "<output_dir>/mlruns"
# (next to the db). A URI is used so the path is valid on linux, mac and windows os.
"tracking_uri": "$monai.utils.path_to_sqlite_uri(@output_dir + '/mlruns.db')",
"experiment_name": "monai_experiment",
"run_name": None,
# may fill it at runtime
Expand Down
95 changes: 88 additions & 7 deletions monai/handlers/mlflow_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,16 @@
from torch.utils.data import Dataset

from monai.apps.utils import get_logger
from monai.utils import CommonKeys, IgniteInfo, ensure_tuple, flatten_dict, min_version, optional_import
from monai.utils import (
CommonKeys,
IgniteInfo,
ensure_tuple,
flatten_dict,
min_version,
optional_import,
path_to_sqlite_uri,
path_to_uri,
)

Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
mlflow, _ = optional_import("mlflow", descriptor="Please install mlflow before using MLFlowHandler.")
Expand Down Expand Up @@ -68,7 +77,14 @@ class MLFlowHandler:
tracking_uri: connects to a tracking URI. can also set the `MLFLOW_TRACKING_URI` environment
variable to have MLflow find a URI from there. in both cases, the URI can either be
an HTTP/HTTPS URI for a remote server, a database connection string, or a local path
to log data to a directory. The URI defaults to path `mlruns`.
to log data to a directory. When no ``tracking_uri`` is provided and the
``MLFLOW_TRACKING_URI`` environment variable is unset, the handler now
defaults to a local SQLite database backend at ``sqlite:///<cwd>/mlruns.db`` with
artifacts stored under ``<cwd>/mlruns``. The default was changed from the filesystem
(file store) backend because MLflow 3.13+ raises an exception for the file store unless
``MLFLOW_ALLOW_FILE_STORE=true`` is set; SQLite is the backend MLflow recommends and it
does not raise. Any explicitly provided ``tracking_uri`` (including a local file path or
``file://`` URI) is passed through unchanged.
for more details: https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.set_tracking_uri.
iteration_log: whether to log data to MLFlow when iteration completed, default to `True`.
``iteration_log`` can be also a function and it will be interpreted as an event filter
Expand Down Expand Up @@ -113,6 +129,11 @@ class MLFlowHandler:
optimizer_param_names: parameter names in the optimizer that need to be recorded during running the
workflow, default to `'lr'`.
close_on_complete: whether to close the mlflow run in `complete` phase in workflow, default to False.
artifact_location: the location to store run artifacts in, passed to MLflow when the experiment is
created. When ``None`` and a local SQLite backend is used (from the ``tracking_uri`` argument
or the ``MLFLOW_TRACKING_URI`` environment variable), it defaults to an ``mlruns`` directory
next to the database file; for other backends ``None`` lets MLflow decide based on the
``tracking_uri``. Has no effect if the experiment already exists.

For more details of MLFlow usage, please refer to: https://mlflow.org/docs/latest/index.html.

Expand Down Expand Up @@ -141,6 +162,7 @@ def __init__(
artifacts: str | Sequence[Path] | None = None,
optimizer_param_names: str | Sequence[str] = "lr",
close_on_complete: bool = False,
artifact_location: str | None = None,
) -> None:
self.iteration_log = iteration_log
self.epoch_log = epoch_log
Expand All @@ -156,6 +178,31 @@ def __init__(
self.experiment_param = experiment_param
self.artifacts = ensure_tuple(artifacts)
self.optimizer_param_names = ensure_tuple(optimizer_param_names)
# When no tracking_uri is provided, default to a local SQLite backend instead of the
# filesystem (file store) backend. MLflow 3.13+ raises for the file store unless
# `MLFLOW_ALLOW_FILE_STORE=true` is set, while SQLite is the recommended backend and does
# not raise. Artifacts cannot live inside a database, so by default they are stored under
# the `./mlruns` directory (where the previous file store default kept them) via the
# experiment `artifact_location`. Any explicitly provided tracking_uri is left unchanged.
self.artifact_location = artifact_location
# Resolve the effective tracking URI from the argument or the `MLFLOW_TRACKING_URI`
# environment variable, so both configure the artifact location the same way.
effective_tracking_uri = tracking_uri or os.environ.get("MLFLOW_TRACKING_URI")
# When neither is set, fall back to the local SQLite default described above.
if not effective_tracking_uri:
tracking_uri = effective_tracking_uri = path_to_sqlite_uri(os.path.join(os.getcwd(), "mlruns.db"))
# For a local SQLite backend, keep run artifacts in an `mlruns` directory next to the
# database file (mirroring the previous file-store layout) unless the caller set
# `artifact_location`. Other backends (e.g. a remote server) are left to MLflow to decide.
# Only `tracking_uri` is passed to the client, so an `MLFLOW_TRACKING_URI` env var is
# still resolved by MLflow itself.
if (
self.artifact_location is None
and effective_tracking_uri
and effective_tracking_uri.startswith("sqlite:///")
):
db_path = Path(effective_tracking_uri[len("sqlite:///") :])
self.artifact_location = path_to_uri(db_path.parent / "mlruns")
self.client = mlflow.MlflowClient(tracking_uri=tracking_uri if tracking_uri else None)
self.run_finish_status = mlflow.entities.RunStatus.to_string(mlflow.entities.RunStatus.FINISHED)
self.close_on_complete = close_on_complete
Expand Down Expand Up @@ -245,7 +292,12 @@ def _set_experiment(self):
try:
experiment = self.client.get_experiment_by_name(self.experiment_name)
if not experiment:
experiment_id = self.client.create_experiment(self.experiment_name)
# pass an explicit artifact_location (set for the default SQLite backend, or
# by the caller) so artifacts land in the intended directory; when it is
# None MLflow decides based on the tracking_uri.
experiment_id = self.client.create_experiment(
self.experiment_name, artifact_location=self.artifact_location
)
experiment = self.client.get_experiment(experiment_id)
break
except MlflowException as e:
Expand Down Expand Up @@ -336,14 +388,43 @@ def complete(self) -> None:
for artifact in artifact_list:
self.client.log_artifact(self.cur_run.info.run_id, artifact)

def _dispose_sqlite_store(self) -> None:
"""
Release MLflow's SQLAlchemy engine when a local SQLite tracking backend is used.

MLflow keeps the SQLite connection open for the lifetime of the client, which on
Windows prevents the database file from being deleted. MLflow exposes no public
client close/dispose API, so this reaches into its internals defensively to release
the engine. It is a no-op for non-SQLite backends.
"""
tracking_uri = getattr(self.client, "tracking_uri", "")
if not isinstance(tracking_uri, str) or not tracking_uri.startswith("sqlite:"):
return
store = getattr(getattr(self.client, "_tracking_client", None), "store", None)
if store is None:
return
dispose = getattr(store, "_dispose_engine", None)
if callable(dispose):
dispose()
else:
engine = getattr(store, "engine", None)
if engine is not None:
engine.dispose()
read_engine = getattr(store, "read_engine", None)
if read_engine is not None:
read_engine.dispose()

def close(self) -> None:
"""
Stop current running logger of MLFlow.
Stop current running logger of MLFlow and release local SQLite resources.

"""
if self.cur_run:
self.client.set_terminated(self.cur_run.info.run_id, self.run_finish_status)
self.cur_run = None
try:
if self.cur_run:
self.client.set_terminated(self.cur_run.info.run_id, self.run_finish_status)
self.cur_run = None
finally:
self._dispose_sqlite_store()

def epoch_completed(self, engine: Engine) -> None:
"""
Expand Down
1 change: 1 addition & 0 deletions monai/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
is_sqrt,
issequenceiterable,
list_to_dict,
path_to_sqlite_uri,
path_to_uri,
pprint_edges,
progress_bar,
Expand Down
16 changes: 16 additions & 0 deletions monai/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"save_obj",
"label_union",
"path_to_uri",
"path_to_sqlite_uri",
"pprint_edges",
"check_key_duplicates",
"CheckKeyDuplicatesYamlLoader",
Expand Down Expand Up @@ -727,6 +728,21 @@ def path_to_uri(path: PathLike) -> str:
return Path(path).absolute().as_uri()


def path_to_sqlite_uri(path: PathLike) -> str:
"""
Convert a database file path to a SQLite connection URI, e.g. for use as an MLflow
``tracking_uri``. If not an absolute path, it is converted to an absolute path first.

A forward-slash (POSIX) path is used so the URI is valid on Windows as well as POSIX:
on Windows this yields ``sqlite:///C:/path/db.sqlite`` and on POSIX ``sqlite:////path/db.sqlite``.

Args:
path: input database file path, can be a string or `Path` object.

"""
return f"sqlite:///{Path(path).absolute().as_posix()}"


def pprint_edges(val: Any, n_lines: int = 20) -> str:
"""
Pretty print the head and tail ``n_lines`` of ``val``, and omit the middle part if the part has more than 3 lines.
Expand Down
52 changes: 45 additions & 7 deletions tests/fl/monai_algo/test_fl_monai_algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from monai.fl.client.monai_algo import MonaiAlgo
from monai.fl.utils.constants import ExtraItems
from monai.fl.utils.exchange_object import ExchangeObject
from monai.utils import path_to_uri
from monai.utils import path_to_sqlite_uri
from tests.test_utils import SkipIfNoModule

_root_dir = Path(__file__).resolve().parents[2]
Expand Down Expand Up @@ -79,7 +79,7 @@
"save_execute_config": f"{_data_dir}/config_executed.json",
"trainer": {
"_target_": "MLFlowHandler",
"tracking_uri": path_to_uri(_data_dir) + "/mlflow_override",
"tracking_uri": path_to_sqlite_uri(os.path.join(_data_dir, "mlflow_override.db")),
"output_transform": "$monai.handlers.from_engine(['loss'], first=True)",
"close_on_complete": True,
},
Expand All @@ -103,7 +103,7 @@
workflow_type="train",
logging_file=_logging_file,
tracking="mlflow",
tracking_uri=path_to_uri(_data_dir) + "/mlflow_1",
tracking_uri=path_to_sqlite_uri(os.path.join(_data_dir, "mlflow_1.db")),
experiment_name="monai_eval1",
),
"config_filters_filename": os.path.join(_data_dir, "config_fl_filters.json"),
Expand All @@ -119,7 +119,7 @@
],
"eval_kwargs": {
"tracking": "mlflow",
"tracking_uri": path_to_uri(_data_dir) + "/mlflow_2",
"tracking_uri": path_to_sqlite_uri(os.path.join(_data_dir, "mlflow_2.db")),
"experiment_name": "monai_eval2",
},
"eval_workflow_name": "training",
Expand Down Expand Up @@ -179,6 +179,38 @@
]


def _dispose_sqlite_engines():
"""Dispose MLflow's open SQLAlchemy SQLite engines so the test ``.db`` files can be removed.

MLflow keeps a SQLite connection open for the lifetime of its client; on Windows that
locks the database file and breaks cleanup. ``MLFlowHandler.close()`` releases it, but a
workflow may finish without closing every handler, so dispose defensively here before
deleting the files. Scoped to the test's ``mlflow*.db`` backends so unrelated (e.g.
in-memory) sqlite engines elsewhere in the process are left untouched.
"""
import gc

try:
from sqlalchemy.engine import Engine
except ImportError:
return
gc.collect()
for obj in gc.get_objects():
# gc.get_objects() can include dead weakref proxies, whose isinstance() raises
# ReferenceError, so guard the whole inspection (ReferenceError is an Exception).
try:
if not isinstance(obj, Engine):
continue
url = obj.url
db = url.database if url.get_backend_name() == "sqlite" else None
# the test backends are all files named ``mlflow*.db``; match those only so
# unrelated (e.g. in-memory) sqlite engines in the process are left untouched.
if db and os.path.basename(db).startswith("mlflow"):
obj.dispose()
except Exception:
pass
Comment on lines +182 to +211
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Confirm SQLAlchemy is pinned/available and inspect the URL API.
fd -t f 'requirements.*\.txt|setup\.cfg|pyproject\.toml' | xargs rg -n -i 'sqlalchemy'

Repository: Project-MONAI/MONAI

Length of output: 47


🌐 Web query:

Does SQLAlchemy URL expose get_backend_name() and a database attribute in 1.4 and 2.0?

💡 Result:

Yes, the SQLAlchemy URL object exposes both the get_backend_name method and the database attribute in both SQLAlchemy 1.4 and 2.0 [1][2]. Both were formalized as part of the URL object's overhaul in version 1.4, where the URL object was redesigned to be an immutable namedtuple-like structure [3][4]. The get_backend_name method returns the database backend name (the portion of the drivername to the left of the plus sign) [1][2], and the database attribute provides access to the database name specified in the URL [1][2].

Citations:


🏁 Script executed:

#!/bin/bash
# Find where SQLAlchemy is declared/pinned in this repo.
rg -n --hidden --no-ignore-vcs -S "sqlalchemy" pyproject.toml setup.cfg setup.py requirements*.txt */requirements*.txt 2>/dev/null || true

Repository: Project-MONAI/MONAI

Length of output: 45


🌐 Web query:

sqlalchemy version pins MONAI supported versions (sqlalchemy) fl monai_algo?

💡 Result:

MONAI does not appear to pin SQLAlchemy versions. What I found in MONAI packaging/dependency metadata: - MONAI’s declared install requirements in setup.cfg list torch and numpy, plus many optional dependencies under extras; SQLAlchemy is not present in the shown install_requires or extras list [1][2]. - The MONAI PyPI page describes dependency versioning for major releases and points to requirements*.txt files, but the dependency table excerpt shown there does not include SQLAlchemy [3]. So there is no explicit “sqlalchemy version pin” I can cite from MONAI’s published dependency metadata for the monai/fl client algorithm component (monai_algo.py) either—monai_algo.py itself is code for FL client behavior and does not specify SQLAlchemy dependencies [4]. If you meant “MONAI FL (monai_algo) indirectly depends on SQLAlchemy via another library”, tell me which MONAI version you’re using (e.g., 1.3.0/1.4.0/1.5.x) and your install extras (e.g., monai[all]) and I can trace the transitive dependency chain from that exact lockfile/requirements output.

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail
# List likely dependency declaration files
fd -t f -i 'requirements*.txt|setup.cfg|pyproject.toml|setup.py' . || true

# Search for sqlalchemy/sqlalchemy in those files
for f in $(fd -t f -i 'requirements*.txt|setup.cfg|pyproject.toml|setup.py' .); do
  rg -n -i 'sqlalchemy' "$f" || true
done

Repository: Project-MONAI/MONAI

Length of output: 146


🏁 Script executed:

#!/bin/bash
set -euo pipefail
# Confirm there aren't other dependency declaration files referencing sqlalchemy.
rg -n -i --hidden --no-ignore-vcs "sqlalchemy" \
  -g 'requirements*.txt' -g 'setup.cfg' -g 'pyproject.toml' -g 'setup.py' -g 'Pipfile*' -g '*.yml' -g '*.yaml' \
  . | head -n 50

Repository: Project-MONAI/MONAI

Length of output: 45


Narrow the failure mode of _dispose_sqlite_engines() (SQLAlchemy URL API)

  • SQLAlchemy’s URL object includes both get_backend_name() and database in the 1.4/2.0 API, so the current attribute access matches upstream.
  • MONAI repo doesn’t pin SQLAlchemy in the searched dependency metadata; to avoid silently neutering cleanup on any transitive/version drift, narrow the except Exception: to expected errors (e.g., AttributeError) or gate with hasattr(url, "get_backend_name") / hasattr(url, "database") so failures aren’t fully swallowed.
🧰 Tools
🪛 Ruff (0.15.15)

[error] 208-209: try-except-pass detected, consider logging the exception

(S110)


[warning] 208-208: Do not catch blind exception: Exception

(BLE001)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/fl/monai_algo/test_fl_monai_algo.py` around lines 182 - 209, The broad
except in _dispose_sqlite_engines() hides unexpected errors when probing
SQLAlchemy Engine.url; narrow the failure mode by checking attributes or
catching AttributeError only: verify hasattr(url, "get_backend_name") and
hasattr(url, "database") before calling url.get_backend_name() and accessing
url.database (or replace the broad except Exception with except AttributeError)
so only missing-attribute issues are swallowed while other errors surface;
reference Engine, url.get_backend_name, and url.database in your change.



@SkipIfNoModule("ignite")
@SkipIfNoModule("mlflow")
class TestFLMonaiAlgo(unittest.TestCase):
Expand All @@ -202,8 +234,11 @@ def test_train(self, input_params):

# test experiment management
if "save_execute_config" in algo.train_workflow.parser:
self.assertTrue(os.path.exists(f"{_data_dir}/mlflow_override"))
shutil.rmtree(f"{_data_dir}/mlflow_override")
_dispose_sqlite_engines() # release SQLite handles so the db file can be removed on Windows
self.assertTrue(os.path.exists(f"{_data_dir}/mlflow_override.db"))
os.remove(f"{_data_dir}/mlflow_override.db")
if os.path.isdir(f"{_data_dir}/mlruns"):
shutil.rmtree(f"{_data_dir}/mlruns")
self.assertTrue(os.path.exists(f"{_data_dir}/config_executed.json"))
os.remove(f"{_data_dir}/config_executed.json")

Expand All @@ -225,9 +260,12 @@ def test_evaluate(self, input_params):

# test experiment management
if "save_execute_config" in algo.eval_workflow.parser:
_dispose_sqlite_engines() # release SQLite handles so the db files can be removed on Windows
self.assertGreater(len(list(glob.glob(f"{_data_dir}/mlflow_*"))), 0)
for f in list(glob.glob(f"{_data_dir}/mlflow_*")):
shutil.rmtree(f)
shutil.rmtree(f) if os.path.isdir(f) else os.remove(f)
if os.path.isdir(f"{_data_dir}/mlruns"):
shutil.rmtree(f"{_data_dir}/mlruns")
self.assertGreater(len(list(glob.glob(f"{_data_dir}/eval/config_*"))), 0)
for f in list(glob.glob(f"{_data_dir}/eval/config_*")):
os.remove(f)
Expand Down
Loading
Loading