Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions openkb/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,13 @@ def copy_relative_images(
- Missing source file: log a warning and leave the original text unchanged.
"""
result = markdown
# Track the destination chosen for each already-copied source so the same
# image referenced twice isn't duplicated, plus the set of taken names so
# two *different* sources that share a basename (e.g. ``a/logo.png`` and
# ``b/logo.png``) don't overwrite each other and collapse both links onto a
# single image.
assigned: dict[Path, str] = {}
taken: set[str] = set()
Comment on lines +231 to +232

for match in _RELATIVE_RE.finditer(markdown):
alt, rel_path = match.group(1), match.group(2)
Expand All @@ -236,10 +243,17 @@ def copy_relative_images(
)
continue

filename = src.name
dest = images_dir / filename
images_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dest)
filename = assigned.get(src)
if filename is None:
filename = src.name
n = 1
while filename in taken:
filename = f"{src.stem}_{n}{src.suffix}"
n += 1
assigned[src] = filename
taken.add(filename)
images_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, images_dir / filename)

new_ref = f"![{alt}](sources/images/{doc_name}/{filename})"
result = result.replace(match.group(0), new_ref, 1)
Expand Down
37 changes: 37 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,40 @@ def test_multiple_relative_images_all_copied(self, tmp_path):
assert "![b](sources/images/doc/b.jpg)" in result
assert (images_dir / "a.png").exists()
assert (images_dir / "b.jpg").exists()

def test_same_basename_different_dirs_no_overwrite(self, tmp_path):
# Two distinct images sharing a basename must not overwrite each other
# (which would lose one image and point both links at the survivor).
source_dir = tmp_path / "source"
(source_dir / "a").mkdir(parents=True)
(source_dir / "b").mkdir(parents=True)
(source_dir / "a" / "logo.png").write_bytes(FAKE_PNG)
(source_dir / "b" / "logo.png").write_bytes(FAKE_JPG)

images_dir = tmp_path / "images" / "doc"
images_dir.mkdir(parents=True)

md = "![a](a/logo.png)\n![b](b/logo.png)"
result = copy_relative_images(md, source_dir, "doc", images_dir)

saved = sorted(p.name for p in images_dir.iterdir())
assert len(saved) == 2 # both copied, neither overwritten
assert {(images_dir / n).read_bytes() for n in saved} == {FAKE_PNG, FAKE_JPG}
links = sorted(
line.split("](")[1].rstrip(")") for line in result.strip().splitlines()
)
assert links[0] != links[1] # links point at different files

def test_same_image_referenced_twice_is_copied_once(self, tmp_path):
# Identical source referenced twice: copy once, both links agree.
source_dir = tmp_path / "source"
source_dir.mkdir()
(source_dir / "logo.png").write_bytes(FAKE_PNG)
images_dir = tmp_path / "images" / "doc"
images_dir.mkdir(parents=True)

md = "![x](logo.png)\n![y](logo.png)"
result = copy_relative_images(md, source_dir, "doc", images_dir)

assert [p.name for p in images_dir.iterdir()] == ["logo.png"]
assert result.count("sources/images/doc/logo.png") == 2