diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py index c2b92619..35e8ad84 100644 --- a/openkb/agent/compiler.py +++ b/openkb/agent/compiler.py @@ -940,12 +940,15 @@ def _prepend_source_to_frontmatter(text: str, source_file: str) -> str: return text fm_block, body = parts - # Split the fm_block into lines for per-line manipulation. fm_block ends - # with "\n---\n"; strip the trailing closing delimiter + newline to get - # the prefix lines (opening "---" + content lines), then re-append after. - fm_prefix, _, _ = fm_block.rpartition("\n---\n") + # Strip the trailing closing delimiter to get the prefix lines (opening + # "---" + content lines), then re-append it. `frontmatter.split` leaves the + # closing at the end of fm_block as either "\n---\n" or a bare "\n---" (when + # the page ends at the delimiter with no trailing newline). Assuming only + # "\n---\n" would, for the bare form, make the strip below collapse the + # whole block and drop every existing frontmatter key. + closing = "\n---\n" if fm_block.endswith("\n---\n") else "\n---" + fm_prefix = fm_block[: -len(closing)] fm_lines = fm_prefix.split("\n") - closing = "\n---\n" for i, line in enumerate(fm_lines): if not line.lstrip().startswith("sources:"): @@ -981,9 +984,12 @@ def _remove_source_from_frontmatter(text: str, source_file: str) -> tuple[str, b return text, False fm_block, body = parts - fm_prefix, _, _ = fm_block.rpartition("\n---\n") + # See _prepend_source_to_frontmatter: the closing delimiter may be "\n---\n" + # or a bare "\n---" (no trailing newline); strip whichever is present so the + # existing frontmatter lines (and the sources: line we need) are preserved. + closing = "\n---\n" if fm_block.endswith("\n---\n") else "\n---" + fm_prefix = fm_block[: -len(closing)] fm_lines = fm_prefix.split("\n") - closing = "\n---\n" for i, line in enumerate(fm_lines): if not line.lstrip().startswith("sources:"): diff --git a/tests/test_compiler.py b/tests/test_compiler.py index dfc6d6ff..6ec387a9 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -27,11 +27,44 @@ _parse_entities_plan, _filter_entity_items, _ENTITY_TYPE_LIST, + _prepend_source_to_frontmatter, + _remove_source_from_frontmatter, remove_doc_from_entity_pages, ) from openkb.config import resolve_entity_types +class TestFrontmatterSourceMutation: + """``_prepend``/``_remove_source_from_frontmatter`` must preserve existing + frontmatter even when the page ends at the closing ``---`` with no trailing + newline — ``frontmatter.split`` then returns a block ending in a bare + ``\\n---`` rather than ``\\n---\\n``. + """ + + def test_prepend_preserves_keys_without_trailing_newline(self): + text = '---\nsources: ["summaries/p1.md"]\ntype: "Concept"\ndescription: "Focus"\n---' + out = _prepend_source_to_frontmatter(text, "summaries/p2.md") + assert out.startswith("---\n") # opening delimiter kept + assert 'type: "Concept"' in out # other keys kept + assert 'description: "Focus"' in out + assert "summaries/p1.md" in out # existing source kept + assert "summaries/p2.md" in out # new source prepended + + def test_remove_preserves_keys_without_trailing_newline(self): + text = '---\ntype: "Organization"\nsources: ["summaries/doc.md"]\n---' + out, now_empty = _remove_source_from_frontmatter(text, "summaries/doc.md") + assert now_empty is True # it was the only source + assert 'type: "Organization"' in out # other key preserved + assert "summaries/doc.md" not in out # source removed + + def test_prepend_with_body_is_unchanged(self): + text = '---\nsources: ["a.md"]\ntype: "Concept"\n---\n\nBody.\n' + out = _prepend_source_to_frontmatter(text, "b.md") + assert out.startswith("---\n") + assert "b.md" in out and "a.md" in out + assert out.endswith("\n\nBody.\n") # body + closing untouched + + class TestParseJson: def test_plain_json(self): assert _parse_json('[{"name": "foo"}]') == [{"name": "foo"}]