From a1d74b40d648a948df78cbceedc17fe5541a1c13 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 19:59:15 +0000 Subject: [PATCH 1/3] Add --html flag for email-friendly HTML table output Agent-Logs-Url: https://github.com/FertigLab/ontrack/sessions/681c0cc2-9919-46f5-ac21-91ae05b69f37 Co-authored-by: dimalvovs <1246862+dimalvovs@users.noreply.github.com> --- ontrack.py | 232 +++++++++++++++++++++++++++ tests/test_ontrack.py | 354 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 586 insertions(+) diff --git a/ontrack.py b/ontrack.py index 49e9f49..7bfdbcb 100644 --- a/ontrack.py +++ b/ontrack.py @@ -47,6 +47,7 @@ import fnmatch import functools import grp +import html import logging import os import pathlib @@ -687,6 +688,204 @@ def print_report(report_data: dict) -> None: ) +_CSS_TABLE = "border-collapse: collapse; font-family: sans-serif;" +_CSS_TH = ( + "border: 1px solid #ccc; padding: 6px 12px; " + "background-color: #f0f0f0; text-align: left;" +) +_CSS_TD = "border: 1px solid #ccc; padding: 6px 12px;" + + +def _th(text: str) -> str: + """Return an HTML ```` element string with standard table heading style. + + Args: + text: Cell text; HTML-escaped before insertion. + """ + return f'{html.escape(str(text))}' + + +def _td(value: object) -> str: + """Return an HTML ```` element string with standard table cell style. + + Args: + value: Cell value; converted to ``str`` and HTML-escaped before insertion. + """ + return f'{html.escape(str(value))}' + + +def _collect_meta_keys(entries: list[dict]) -> list[str]: + """Return an ordered list of metadata field names found across *entries*. + + Fields listed in :data:`_REQUIRED_METADATA_FIELDS` appear first (in their + defined order), followed by any additional keys in the order they are first + encountered while iterating over *entries*. + + Args: + entries: List of directory entry dicts as returned by + :func:`_build_directory_entry`. + """ + seen: set[str] = set() + keys: list[str] = [] + # Required fields come first. + for field in _REQUIRED_METADATA_FIELDS: + for entry in entries: + meta = entry.get("metadata") or {} + if field in meta and field not in seen: + keys.append(field) + seen.add(field) + break + # Remaining fields in encounter order. + for entry in entries: + meta = entry.get("metadata") or {} + for key in meta: + if key not in seen: + keys.append(key) + seen.add(key) + return keys + + +def _entry_to_html_row( + entry: dict, + has_groups: bool, + has_sizes: bool, + meta_keys: list[str], +) -> str: + """Return an HTML ```` string for a single directory *entry*. + + Args: + entry: Directory entry dict as returned by :func:`_build_directory_entry`. + has_groups: Whether a Groups column is present in the table. + has_sizes: Whether Files and Total Size columns are present. + meta_keys: Ordered list of metadata field names; one ```` is emitted + per key (empty string when the entry has no value for that key). + + Returns: + A ``...`` HTML string with all cells inline-styled. + """ + cells = [ + _td(entry.get("directory", "")), + _td(entry.get("username", "")), + ] + if has_groups: + groups = entry.get("groups") or [] + cells.append(_td(", ".join(groups))) + if has_sizes: + cells.append(_td(entry.get("file_count", ""))) + cells.append(_td(entry.get("total_size_human", ""))) + cells.append(_td("Yes" if entry.get("on_track") else "No")) + meta = entry.get("metadata") or {} + for key in meta_keys: + cells.append(_td(meta.get(key, ""))) + return " " + "".join(cells) + "" + + +def _print_html_entries(entries: list[dict]) -> None: + """Print an HTML table of directory *entries* to stdout. + + The table uses inline CSS for compatibility with email clients. Optional + columns (Groups, Files, Total Size) are included only when at least one + entry carries that data. Any metadata fields found across all entries are + appended as additional columns. + + Args: + entries: List of directory entry dicts as returned by + :func:`_build_directory_entry`. + """ + has_groups = any("groups" in e for e in entries) + has_sizes = any("file_count" in e for e in entries) + meta_keys = _collect_meta_keys(entries) + + headers = ["Directory", "Username"] + if has_groups: + headers.append("Groups") + if has_sizes: + headers.extend(["Files", "Total Size"]) + headers.append("On Track") + headers.extend(k.capitalize() for k in meta_keys) + + print(f'') + print(" ") + print(" " + "".join(_th(h) for h in headers) + "") + print(" ") + print(" ") + for entry in entries: + print( + _entry_to_html_row( + entry, has_groups=has_groups, has_sizes=has_sizes, meta_keys=meta_keys + ) + ) + print(" ") + print("
") + + +def _print_html_report(report_data: dict) -> None: + """Print on-track statistics as two HTML tables to stdout. + + Outputs a per-track counts table followed by a per-user on-track share + table (with a total average row appended at the bottom). Both tables use + inline CSS for email-client compatibility. + + Args: + report_data: A dict as returned by :func:`compute_report`. + """ + # --- per-track table --- + per_track: dict[str | None, int] = report_data.get("per_track", {}) + named_tracks = sorted(t for t in per_track if t is not None) + + print(f'') + print(" ") + print(" " + _th("Track") + _th("Count") + "") + print(" ") + print(" ") + for track in named_tracks: + print(" " + _td(track) + _td(per_track[track]) + "") + if None in per_track: + print(" " + _td("(untracked)") + _td(per_track[None]) + "") + print(" ") + print("
") + + print() + + # --- per-user table --- + per_user: dict[str, dict] = report_data["per_user"] + avg_pct = f"{report_data['average_share'] * 100:.1f}%" + + print(f'') + print(" ") + print( + " " + + _th("Username") + + _th("On Track") + + _th("Total") + + _th("Share") + + "" + ) + print(" ") + print(" ") + for username in sorted(per_user): + stats = per_user[username] + share_pct = f"{stats['share'] * 100:.1f}%" + print( + " " + + _td(username) + + _td(stats["on_track"]) + + _td(stats["total"]) + + _td(share_pct) + + "" + ) + print( + " " + + _td("Total average") + + _td(report_data["total_on_track"]) + + _td(report_data["total"]) + + _td(avg_pct) + + "" + ) + print(" ") + print("
") + + def load_config(config_path: str) -> dict: """Load and return the YAML configuration file.""" with open(config_path, "r") as fh: @@ -717,6 +916,7 @@ def main( output: str | None = None, report: bool = False, find: str | None = None, + html_output: bool = False, ) -> None: """Run ontrack with the given options. @@ -733,6 +933,9 @@ def main( file; otherwise they are printed to stdout. find: Optional exact-match filter. Only entries containing at least one output field whose value exactly matches this string are kept. + html_output: When ``True``, render the report as an HTML table printed + to stdout instead of the default plain-text format. Ignored when + *output* is also provided (YAML to file takes precedence). """ config = load_config(config_path) paths: list[str] = config.get("paths", []) @@ -801,6 +1004,8 @@ def main( with open(output, "w") as fh: yaml.dump(report_data, fh, default_flow_style=False, allow_unicode=True) logger.info("Report written to %s", output) + elif html_output: + _print_html_report(report_data) else: print_report(report_data) elif output is not None: @@ -819,6 +1024,20 @@ def main( with open(output, "w") as fh: yaml.dump(results, fh, default_flow_style=False, allow_unicode=True) logger.info("Report written to %s", output) + elif html_output: + entries = [] + for path in iterator: + entry = _build_directory_entry( + path, + groups=groups, + light=light, + show_progress=progress, + ignore_patterns=ignore_patterns, + valid_tracks=valid_tracks, + ) + if entry is not None and _entry_matches_find(entry, find): + entries.append(entry) + _print_html_entries(entries) else: for path in iterator: entry = _build_directory_entry( @@ -876,6 +1095,8 @@ def cli() -> None: " %(prog)s --config ontrack.config --groups mygroup --light\n" " %(prog)s --config ontrack.config --groups mygroup --report\n" " %(prog)s --config ontrack.config --output report.yaml\n" + " %(prog)s --config ontrack.config --html\n" + " %(prog)s --config ontrack.config --groups mygroup --report --html\n" " %(prog)s --config ontrack.config --find alice\n" " %(prog)s --config ontrack.config --progress\n" ), @@ -937,6 +1158,16 @@ def cli() -> None: "(e.g. username, track name, Yes/No on-track status)." ), ) + parser.add_argument( + "--html", + action="store_true", + default=False, + help=( + "Render the report as an HTML table printed to stdout instead of the " + "default plain-text format. Produces email-friendly output that looks " + "correct with any font. Ignored when --output is also given." + ), + ) args = parser.parse_args() if not sys.argv[1:]: parser.print_help() @@ -950,6 +1181,7 @@ def cli() -> None: output=args.output, report=args.report, find=args.find, + html_output=args.html, ) diff --git a/tests/test_ontrack.py b/tests/test_ontrack.py index 6884925..a21bcc5 100644 --- a/tests/test_ontrack.py +++ b/tests/test_ontrack.py @@ -16,11 +16,15 @@ from ontrack import ( _build_directory_entry, + _collect_meta_keys, + _entry_to_html_row, _find_reporting_directories, _get_directory_metadata, _is_ignored, _is_on_track, _load_ontrack_yml, + _print_html_entries, + _print_html_report, _resolve_config_path, _run_du, _uid_to_username, @@ -2197,3 +2201,353 @@ def test_main_entrypoint_long_help_flag_prints_help(capsys, monkeypatch): captured = capsys.readouterr() assert "usage:" in captured.out.lower() assert "examples:" in captured.out + + +# --------------------------------------------------------------------------- +# _collect_meta_keys +# --------------------------------------------------------------------------- + + +def test_collect_meta_keys_empty(): + """_collect_meta_keys returns an empty list when no entries have metadata.""" + entries = [{"directory": "/a", "username": "alice", "on_track": False}] + assert _collect_meta_keys(entries) == [] + + +def test_collect_meta_keys_required_first(): + """Required fields appear before additional metadata keys.""" + entries = [ + {"metadata": {"track": "rna-seq", "pi": "Smith"}}, + {"metadata": {"extra": "value"}}, + ] + keys = _collect_meta_keys(entries) + assert keys[0] == "track" + assert "pi" in keys + assert "extra" in keys + + +def test_collect_meta_keys_deduplicates(): + """Each key appears only once even when present in multiple entries.""" + entries = [ + {"metadata": {"track": "a"}}, + {"metadata": {"track": "b", "pi": "Smith"}}, + ] + keys = _collect_meta_keys(entries) + assert keys.count("track") == 1 + + +# --------------------------------------------------------------------------- +# _entry_to_html_row +# --------------------------------------------------------------------------- + + +def test_entry_to_html_row_basic(): + """_entry_to_html_row produces a with expected cell values.""" + entry = { + "directory": "/data/alice", + "username": "alice", + "file_count": 42, + "total_size_human": "1.00 MB", + "on_track": True, + } + row = _entry_to_html_row(entry, has_groups=False, has_sizes=True, meta_keys=[]) + assert "" in row + assert "/data/alice" in row + assert "alice" in row + assert "42" in row + assert "1.00 MB" in row + assert "Yes" in row + + +def test_entry_to_html_row_html_escaping(): + """Cell values containing HTML special characters are properly escaped.""" + entry = { + "directory": "/data/", + "username": "alice&bob", + "on_track": False, + } + row = _entry_to_html_row(entry, has_groups=False, has_sizes=False, meta_keys=[]) + assert "<project>" in row + assert "alice&bob" in row + assert "<project>" in row + # Raw unescaped chars must not appear as tags + assert "" not in row + + +def test_entry_to_html_row_with_groups(): + """Groups column is included and joined with a comma when has_groups=True.""" + entry = { + "directory": "/data/alice", + "username": "alice", + "groups": ["labA", "labB"], + "on_track": True, + } + row = _entry_to_html_row(entry, has_groups=True, has_sizes=False, meta_keys=[]) + assert "labA, labB" in row + + +def test_entry_to_html_row_missing_groups_cell_is_empty(): + """When has_groups=True but entry has no 'groups' key, cell value is empty.""" + entry = {"directory": "/data/x", "username": "x", "on_track": False} + row = _entry_to_html_row(entry, has_groups=True, has_sizes=False, meta_keys=[]) + # Should not raise and should produce a row + assert "" in row + + +def test_entry_to_html_row_with_meta_keys(): + """Metadata columns are appended with the entry's value or empty string.""" + entry = { + "directory": "/data/p", + "username": "u", + "on_track": True, + "metadata": {"track": "rna-seq", "pi": "Smith"}, + } + row = _entry_to_html_row( + entry, has_groups=False, has_sizes=False, meta_keys=["track", "pi", "extra"] + ) + assert "rna-seq" in row + assert "Smith" in row + + +# --------------------------------------------------------------------------- +# _print_html_entries +# --------------------------------------------------------------------------- + + +def test_print_html_entries_basic(capsys, tmp_path): + """_print_html_entries emits a with thead and tbody.""" + (tmp_path / "f.txt").write_text("data") + entries = [ + { + "directory": str(tmp_path), + "username": "alice", + "file_count": 1, + "total_size_human": "4.00 B", + "on_track": False, + } + ] + _print_html_entries(entries) + out = capsys.readouterr().out + assert "" in out + assert "" in out + assert "
" in out + assert str(tmp_path) in out + assert "alice" in out + assert "1" in out + assert "4.00 B" in out + assert "No" in out + + +def test_print_html_entries_includes_groups_column_when_present(capsys): + """Groups column header and values are included when entries have groups.""" + entries = [ + { + "directory": "/d", + "username": "u", + "groups": ["labA"], + "on_track": True, + } + ] + _print_html_entries(entries) + out = capsys.readouterr().out + assert "Groups" in out + assert "labA" in out + + +def test_print_html_entries_omits_groups_column_when_absent(capsys): + """Groups column is omitted when no entry has a 'groups' key.""" + entries = [{"directory": "/d", "username": "u", "on_track": False}] + _print_html_entries(entries) + out = capsys.readouterr().out + assert "Groups" not in out + + +def test_print_html_entries_light_mode_omits_size_columns(capsys): + """Files and Total Size columns are omitted when entries have no file_count.""" + entries = [{"directory": "/d", "username": "u", "on_track": False}] + _print_html_entries(entries) + out = capsys.readouterr().out + assert "Files" not in out + assert "Total Size" not in out + + +def test_print_html_entries_html_escaping(capsys): + """Values with HTML special characters are escaped in the table output.""" + entries = [ + { + "directory": "/data/", + "username": "alice&bob", + "on_track": False, + } + ] + _print_html_entries(entries) + out = capsys.readouterr().out + assert "<project>" in out + assert "alice&bob" in out + assert "" not in out + + +def test_print_html_entries_with_metadata_columns(capsys): + """Metadata fields appear as extra columns in the HTML table.""" + entries = [ + { + "directory": "/d", + "username": "u", + "on_track": True, + "metadata": {"track": "rna-seq", "pi": "Smith"}, + } + ] + _print_html_entries(entries) + out = capsys.readouterr().out + assert "Track" in out + assert "rna-seq" in out + assert "Pi" in out + assert "Smith" in out + + +def test_print_html_entries_empty_list(capsys): + """_print_html_entries handles an empty entry list without errors.""" + _print_html_entries([]) + out = capsys.readouterr().out + assert "" in out + + +# --------------------------------------------------------------------------- +# _print_html_report +# --------------------------------------------------------------------------- + + +def test_print_html_report_basic(capsys): + """_print_html_report emits two tables with track and user data.""" + report_data = { + "per_track": {"rna-seq": 3, None: 1}, + "per_user": { + "alice": {"on_track": 2, "total": 3, "share": 2 / 3}, + "bob": {"on_track": 1, "total": 1, "share": 1.0}, + }, + "total_on_track": 3, + "total": 4, + "average_share": 0.75, + } + _print_html_report(report_data) + out = capsys.readouterr().out + + # Two tables should be present + assert out.count("") == 2 + + # Per-track table content + assert "Track" in out + assert "Count" in out + assert "rna-seq" in out + assert "(untracked)" in out + + # Per-user table content + assert "Username" in out + assert "On Track" in out + assert "alice" in out + assert "bob" in out + + # Summary row + assert "Total average" in out + assert "75.0%" in out + + +def test_print_html_report_html_escaping(capsys): + """Track names and usernames with special HTML characters are escaped.""" + report_data = { + "per_track": {"": 1}, + "per_user": { + "alice&bob": {"on_track": 1, "total": 1, "share": 1.0}, + }, + "total_on_track": 1, + "total": 1, + "average_share": 1.0, + } + _print_html_report(report_data) + out = capsys.readouterr().out + assert "<special>" in out + assert "" not in out + assert "alice&bob" in out + assert "alice&bob" not in out + + +def test_print_html_report_no_tracks(capsys): + """_print_html_report handles a report with no named tracks gracefully.""" + report_data = { + "per_track": {None: 2}, + "per_user": {"alice": {"on_track": 0, "total": 2, "share": 0.0}}, + "total_on_track": 0, + "total": 2, + "average_share": 0.0, + } + _print_html_report(report_data) + out = capsys.readouterr().out + assert "(untracked)" in out + assert "0.0%" in out + + +# --------------------------------------------------------------------------- +# main with html_output=True +# --------------------------------------------------------------------------- + + +def test_main_html_output_entries(tmp_path, capsys): + """main with html_output=True prints an HTML table of directory entries.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + (data_dir / "file.txt").write_text("hello") + + config_file = tmp_path / "config.yaml" + config_file.write_text(f"paths:\n - {data_dir}\n") + + main(str(config_file), html_output=True) + out = capsys.readouterr().out + assert " Date: Fri, 8 May 2026 20:00:55 +0000 Subject: [PATCH 2/3] Address code review feedback: remove duplicate assertion, clarify escaping check, document capitalize() behaviour Agent-Logs-Url: https://github.com/FertigLab/ontrack/sessions/681c0cc2-9919-46f5-ac21-91ae05b69f37 Co-authored-by: dimalvovs <1246862+dimalvovs@users.noreply.github.com> --- ontrack.py | 2 ++ tests/test_ontrack.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ontrack.py b/ontrack.py index 7bfdbcb..5b42239 100644 --- a/ontrack.py +++ b/ontrack.py @@ -802,6 +802,8 @@ def _print_html_entries(entries: list[dict]) -> None: if has_sizes: headers.extend(["Files", "Total Size"]) headers.append("On Track") + # capitalize() matches the existing plain-text _print_directory_entry behaviour + # (first letter upper, rest lower). Metadata keys are expected to be lowercase. headers.extend(k.capitalize() for k in meta_keys) print(f'') diff --git a/tests/test_ontrack.py b/tests/test_ontrack.py index a21bcc5..360d703 100644 --- a/tests/test_ontrack.py +++ b/tests/test_ontrack.py @@ -2269,7 +2269,6 @@ def test_entry_to_html_row_html_escaping(): row = _entry_to_html_row(entry, has_groups=False, has_sizes=False, meta_keys=[]) assert "<project>" in row assert "alice&bob" in row - assert "<project>" in row # Raw unescaped chars must not appear as tags assert "" not in row @@ -2471,7 +2470,8 @@ def test_print_html_report_html_escaping(capsys): assert "<special>" in out assert "" not in out assert "alice&bob" in out - assert "alice&bob" not in out + # Verify the raw ampersand is not present unescaped (i.e. not followed by 'b' directly) + assert "&bob" not in out def test_print_html_report_no_tracks(capsys): From 985ff10f7d68c198d61079e106cd2008e9ca75ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 20:01:56 +0000 Subject: [PATCH 3/3] Strengthen escaping assertion and clarify capitalize() comment Agent-Logs-Url: https://github.com/FertigLab/ontrack/sessions/681c0cc2-9919-46f5-ac21-91ae05b69f37 Co-authored-by: dimalvovs <1246862+dimalvovs@users.noreply.github.com> --- ontrack.py | 3 ++- tests/test_ontrack.py | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ontrack.py b/ontrack.py index 5b42239..c24bd2b 100644 --- a/ontrack.py +++ b/ontrack.py @@ -803,7 +803,8 @@ def _print_html_entries(entries: list[dict]) -> None: headers.extend(["Files", "Total Size"]) headers.append("On Track") # capitalize() matches the existing plain-text _print_directory_entry behaviour - # (first letter upper, rest lower). Metadata keys are expected to be lowercase. + # (first letter upper, rest lower). This is a display convention — metadata + # keys from ontrack.yml are not validated here. headers.extend(k.capitalize() for k in meta_keys) print(f'
') diff --git a/tests/test_ontrack.py b/tests/test_ontrack.py index 360d703..5cfeeb8 100644 --- a/tests/test_ontrack.py +++ b/tests/test_ontrack.py @@ -2469,9 +2469,8 @@ def test_print_html_report_html_escaping(capsys): out = capsys.readouterr().out assert "<special>" in out assert "" not in out - assert "alice&bob" in out - # Verify the raw ampersand is not present unescaped (i.e. not followed by 'b' directly) - assert "&bob" not in out + # The properly escaped form must appear exactly once (and not the raw form). + assert out.count("alice&bob") == 1 def test_print_html_report_no_tracks(capsys):