From d5e6d5f1197732f0569d077ba9f16c74800f4d17 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Sun, 5 Apr 2026 08:36:40 +0800 Subject: [PATCH 1/4] fix: correct toml integration frontmatter handling --- src/specify_cli/integrations/base.py | 131 +++++++++++++----- .../test_integration_base_toml.py | 48 +++++++ 2 files changed, 141 insertions(+), 38 deletions(-) diff --git a/src/specify_cli/integrations/base.py b/src/specify_cli/integrations/base.py index dac5063f5..5d96ccf0c 100644 --- a/src/specify_cli/integrations/base.py +++ b/src/specify_cli/integrations/base.py @@ -532,23 +532,98 @@ def command_filename(self, template_name: str) -> str: def _extract_description(content: str) -> str: """Extract the ``description`` value from YAML frontmatter. - Scans lines between the first pair of ``---`` delimiters for a - top-level ``description:`` key. Returns the value (with - surrounding quotes stripped) or an empty string if not found. + Parses the YAML frontmatter so block scalar descriptions (``|`` + and ``>``) keep their YAML semantics instead of being treated as + raw text. """ - in_frontmatter = False - for line in content.splitlines(): - stripped = line.rstrip("\n\r") - if stripped == "---": - if not in_frontmatter: - in_frontmatter = True - continue - break # second --- - if in_frontmatter and stripped.startswith("description:"): - _, _, value = stripped.partition(":") - return value.strip().strip('"').strip("'") + import yaml + + if not content.startswith("---"): + return "" + + lines = content.splitlines(keepends=True) + if not lines or lines[0].strip() != "---": + return "" + + frontmatter_end = -1 + for i, line in enumerate(lines[1:], start=1): + if line.strip() == "---": + frontmatter_end = i + break + + if frontmatter_end == -1: + return "" + + frontmatter_text = "".join(lines[1:frontmatter_end]) + try: + frontmatter = yaml.safe_load(frontmatter_text) or {} + except yaml.YAMLError: + return "" + + if not isinstance(frontmatter, dict): + return "" + + description = frontmatter.get("description", "") + if isinstance(description, str): + return description return "" + @staticmethod + def _split_frontmatter(content: str) -> tuple[str, str]: + """Split YAML frontmatter from the remaining content. + + Returns ``("", content)`` when no complete frontmatter block is + present. The body is preserved exactly as written so prompt text + keeps its intended formatting. + """ + if not content.startswith("---"): + return "", content + + lines = content.splitlines(keepends=True) + if not lines or lines[0].strip() != "---": + return "", content + + frontmatter_end = -1 + for i, line in enumerate(lines[1:], start=1): + if line.strip() == "---": + frontmatter_end = i + break + + if frontmatter_end == -1: + return "", content + + frontmatter = "".join(lines[1:frontmatter_end]) + body = "".join(lines[frontmatter_end + 1 :]) + return frontmatter, body + + @staticmethod + def _render_toml_string(value: str) -> str: + """Render *value* as a TOML string literal. + + Uses a basic string for single-line values, multiline basic + strings for values containing newlines, and falls back to a + literal string or escaped basic string when delimiters appear in + the content. + """ + if "\n" not in value and "\r" not in value: + escaped = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + + multiline_value = value.rstrip("\n") + escaped = multiline_value.replace("\\", "\\\\") + if '"""' not in escaped: + return '"""\n' + escaped + '\n"""' + if "'''" not in multiline_value: + return "'''\n" + multiline_value + "\n'''" + + return '"' + ( + multiline_value.replace("\\", "\\\\") + .replace('"', '\\"') + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t") + ) + '"' + @staticmethod def _render_toml(description: str, body: str) -> str: """Render a TOML command file from description and body. @@ -565,32 +640,11 @@ def _render_toml(description: str, body: str) -> str: toml_lines: list[str] = [] if description: - desc = description.replace('"', '\\"') - toml_lines.append(f'description = "{desc}"') + toml_lines.append(f"description = {TomlIntegration._render_toml_string(description)}") toml_lines.append("") body = body.rstrip("\n") - - # Escape backslashes for basic multiline strings. - escaped = body.replace("\\", "\\\\") - - if '"""' not in escaped: - toml_lines.append('prompt = """') - toml_lines.append(escaped) - toml_lines.append('"""') - elif "'''" not in body: - toml_lines.append("prompt = '''") - toml_lines.append(body) - toml_lines.append("'''") - else: - escaped_body = ( - body.replace("\\", "\\\\") - .replace('"', '\\"') - .replace("\n", "\\n") - .replace("\r", "\\r") - .replace("\t", "\\t") - ) - toml_lines.append(f'prompt = "{escaped_body}"') + toml_lines.append(f"prompt = {TomlIntegration._render_toml_string(body)}") return "\n".join(toml_lines) + "\n" @@ -630,7 +684,8 @@ def setup( raw = src_file.read_text(encoding="utf-8") description = self._extract_description(raw) processed = self.process_template(raw, self.key, script_type, arg_placeholder) - toml_content = self._render_toml(description, processed) + _, body = self._split_frontmatter(processed) + toml_content = self._render_toml(description, body) dst_name = self.command_filename(src_file.stem) dst_file = self.write_file_and_record( toml_content, dest / dst_name, project_root, manifest diff --git a/tests/integrations/test_integration_base_toml.py b/tests/integrations/test_integration_base_toml.py index 8b0935290..e12dfb836 100644 --- a/tests/integrations/test_integration_base_toml.py +++ b/tests/integrations/test_integration_base_toml.py @@ -9,6 +9,9 @@ """ import os +import tomllib + +import pytest from specify_cli.integrations import INTEGRATION_REGISTRY, get_integration from specify_cli.integrations.base import TomlIntegration @@ -132,6 +135,51 @@ def test_toml_uses_correct_arg_placeholder(self, tmp_path): has_args = any("{{args}}" in f.read_text(encoding="utf-8") for f in cmd_files) assert has_args, "No TOML command file contains {{args}} placeholder" + @pytest.mark.parametrize( + ("frontmatter", "expected"), + [ + ( + "---\ndescription: |\n First line\n Second line\n---\nBody\n", + "First line\nSecond line\n", + ), + ( + "---\ndescription: >\n First line\n Second line\n---\nBody\n", + "First line Second line\n", + ), + ], + ) + def test_toml_extract_description_supports_block_scalars(self, frontmatter, expected): + assert TomlIntegration._extract_description(frontmatter) == expected + + def test_toml_prompt_excludes_frontmatter(self, tmp_path, monkeypatch): + i = get_integration(self.KEY) + template = tmp_path / "sample.md" + template.write_text( + "---\n" + "description: Summary line one\n" + "scripts:\n" + " sh: scripts/bash/example.sh\n" + "---\n" + "Body line one\n" + "Body line two\n", + encoding="utf-8", + ) + monkeypatch.setattr(i, "list_command_templates", lambda: [template]) + + m = IntegrationManifest(self.KEY, tmp_path) + created = i.setup(tmp_path, m) + cmd_files = [f for f in created if "scripts" not in f.parts] + assert len(cmd_files) == 1 + + generated = cmd_files[0].read_text(encoding="utf-8") + parsed = tomllib.loads(generated) + + assert parsed["description"] == "Summary line one" + assert parsed["prompt"] == "Body line one\nBody line two\n" + assert "description:" not in parsed["prompt"] + assert "scripts:" not in parsed["prompt"] + assert "---" not in parsed["prompt"] + def test_toml_is_valid(self, tmp_path): """Every generated TOML file must parse without errors.""" try: From 81694439969b7bca26ec3dc1c172e7f728f73ddd Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Sun, 5 Apr 2026 09:40:29 +0800 Subject: [PATCH 2/4] refactor: reuse frontmatter split in toml integration --- src/specify_cli/integrations/base.py | 18 ++---------------- .../integrations/test_integration_base_toml.py | 5 ----- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/src/specify_cli/integrations/base.py b/src/specify_cli/integrations/base.py index 5d96ccf0c..f869fad9b 100644 --- a/src/specify_cli/integrations/base.py +++ b/src/specify_cli/integrations/base.py @@ -538,23 +538,9 @@ def _extract_description(content: str) -> str: """ import yaml - if not content.startswith("---"): - return "" - - lines = content.splitlines(keepends=True) - if not lines or lines[0].strip() != "---": + frontmatter_text, _ = TomlIntegration._split_frontmatter(content) + if not frontmatter_text: return "" - - frontmatter_end = -1 - for i, line in enumerate(lines[1:], start=1): - if line.strip() == "---": - frontmatter_end = i - break - - if frontmatter_end == -1: - return "" - - frontmatter_text = "".join(lines[1:frontmatter_end]) try: frontmatter = yaml.safe_load(frontmatter_text) or {} except yaml.YAMLError: diff --git a/tests/integrations/test_integration_base_toml.py b/tests/integrations/test_integration_base_toml.py index e12dfb836..902b10de1 100644 --- a/tests/integrations/test_integration_base_toml.py +++ b/tests/integrations/test_integration_base_toml.py @@ -182,11 +182,6 @@ def test_toml_prompt_excludes_frontmatter(self, tmp_path, monkeypatch): def test_toml_is_valid(self, tmp_path): """Every generated TOML file must parse without errors.""" - try: - import tomllib - except ModuleNotFoundError: - import tomli as tomllib # type: ignore[no-redef] - i = get_integration(self.KEY) m = IntegrationManifest(self.KEY, tmp_path) created = i.setup(tmp_path, m) From e942d06cca941c341bb94a61c8e3a1a918f71983 Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Sun, 5 Apr 2026 10:39:33 +0800 Subject: [PATCH 3/4] fix: preserve toml integration string semantics --- src/specify_cli/integrations/base.py | 15 +++++------ .../test_integration_base_toml.py | 26 ++++++++++++++++++- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/specify_cli/integrations/base.py b/src/specify_cli/integrations/base.py index f869fad9b..09ad06718 100644 --- a/src/specify_cli/integrations/base.py +++ b/src/specify_cli/integrations/base.py @@ -566,12 +566,12 @@ def _split_frontmatter(content: str) -> tuple[str, str]: return "", content lines = content.splitlines(keepends=True) - if not lines or lines[0].strip() != "---": + if not lines or lines[0].rstrip("\r\n") != "---": return "", content frontmatter_end = -1 for i, line in enumerate(lines[1:], start=1): - if line.strip() == "---": + if line.rstrip("\r\n") == "---": frontmatter_end = i break @@ -595,15 +595,14 @@ def _render_toml_string(value: str) -> str: escaped = value.replace("\\", "\\\\").replace('"', '\\"') return f'"{escaped}"' - multiline_value = value.rstrip("\n") - escaped = multiline_value.replace("\\", "\\\\") + escaped = value.replace("\\", "\\\\") if '"""' not in escaped: - return '"""\n' + escaped + '\n"""' - if "'''" not in multiline_value: - return "'''\n" + multiline_value + "\n'''" + return '"""\n' + escaped + '"""' + if "'''" not in value: + return "'''\n" + value + "'''" return '"' + ( - multiline_value.replace("\\", "\\\\") + value.replace("\\", "\\\\") .replace('"', '\\"') .replace("\n", "\\n") .replace("\r", "\\r") diff --git a/tests/integrations/test_integration_base_toml.py b/tests/integrations/test_integration_base_toml.py index 902b10de1..2582a9a85 100644 --- a/tests/integrations/test_integration_base_toml.py +++ b/tests/integrations/test_integration_base_toml.py @@ -146,11 +146,35 @@ def test_toml_uses_correct_arg_placeholder(self, tmp_path): "---\ndescription: >\n First line\n Second line\n---\nBody\n", "First line Second line\n", ), + ( + "---\ndescription: |-\n First line\n Second line\n---\nBody\n", + "First line\nSecond line", + ), + ( + "---\ndescription: >-\n First line\n Second line\n---\nBody\n", + "First line Second line", + ), ], ) def test_toml_extract_description_supports_block_scalars(self, frontmatter, expected): assert TomlIntegration._extract_description(frontmatter) == expected + def test_split_frontmatter_ignores_indented_delimiters(self): + content = ( + "---\n" + "description: |\n" + " line one\n" + " ---\n" + " line two\n" + "---\n" + "Body\n" + ) + + frontmatter, body = TomlIntegration._split_frontmatter(content) + + assert "line two" in frontmatter + assert body == "Body\n" + def test_toml_prompt_excludes_frontmatter(self, tmp_path, monkeypatch): i = get_integration(self.KEY) template = tmp_path / "sample.md" @@ -175,7 +199,7 @@ def test_toml_prompt_excludes_frontmatter(self, tmp_path, monkeypatch): parsed = tomllib.loads(generated) assert parsed["description"] == "Summary line one" - assert parsed["prompt"] == "Body line one\nBody line two\n" + assert parsed["prompt"] == "Body line one\nBody line two" assert "description:" not in parsed["prompt"] assert "scripts:" not in parsed["prompt"] assert "---" not in parsed["prompt"] From b70ece31d0438f320b74f963224f20308be4d72e Mon Sep 17 00:00:00 2001 From: rbbtsn0w Date: Sun, 5 Apr 2026 19:04:21 +0800 Subject: [PATCH 4/4] docs: align toml integration renderer docstring --- src/specify_cli/integrations/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/integrations/base.py b/src/specify_cli/integrations/base.py index 09ad06718..0722b9a91 100644 --- a/src/specify_cli/integrations/base.py +++ b/src/specify_cli/integrations/base.py @@ -618,9 +618,10 @@ def _render_toml(description: str, body: str) -> str: to multiline literal strings (``'''``) if the body contains ``\"\"\"``, then to an escaped basic string as a last resort. - The body is rstrip'd so the closing delimiter appears on the line - immediately after the last content line — matching the release - script's ``echo "$body"; echo '\"\"\"'`` pattern. + The body is ``rstrip("\\n")``'d before rendering, so the TOML + value preserves content without forcing a trailing newline. As a + result, multiline delimiters appear on their own line only when + the rendered value itself ends with a newline. """ toml_lines: list[str] = []