Skip to content

Commit b404143

Browse files
fix(config): preserve unicode characters when writing yaml config
yaml.dump() defaults to ASCII-only output, which causes `cz bump` (and `cz init`) to rewrite emoji and other non-ASCII characters in `.cz.yaml` as `\Uxxxx` escape sequences. Pass `allow_unicode=True` so the original characters round-trip. Closes #1164 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4d99415 commit b404143

2 files changed

Lines changed: 40 additions & 2 deletions

File tree

commitizen/config/yaml_config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ def init_empty_config_content(self) -> None:
3030
with smart_open(
3131
self.path, "a", encoding=self._settings["encoding"]
3232
) as json_file:
33-
yaml.dump({"commitizen": {}}, json_file, explicit_start=True)
33+
yaml.dump(
34+
{"commitizen": {}}, json_file, explicit_start=True, allow_unicode=True
35+
)
3436

3537
def contains_commitizen_section(self) -> bool:
3638
with self.path.open("rb") as yaml_file:
@@ -63,6 +65,6 @@ def set_key(self, key: str, value: object) -> Self:
6365
with smart_open(
6466
self.path, "w", encoding=self._settings["encoding"]
6567
) as yaml_file:
66-
yaml.dump(config_doc, yaml_file, explicit_start=True)
68+
yaml.dump(config_doc, yaml_file, explicit_start=True, allow_unicode=True)
6769

6870
return self

tests/test_conf.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,3 +497,39 @@ def test_init_with_invalid_content(self, tmp_path, config_file):
497497
with pytest.raises(InvalidConfigurationError) as excinfo:
498498
YAMLConfig(data=existing_content, path=path)
499499
assert config_file in str(excinfo.value)
500+
501+
def test_set_key_preserves_unicode(self, tmp_path, config_file):
502+
"""Regression test for #1164: emoji and other non-ASCII characters
503+
must be preserved verbatim, not escaped to ``\\Uxxxx`` sequences."""
504+
path = tmp_path / "commitizen" / config_file
505+
path.parent.mkdir(parents=True, exist_ok=True)
506+
path.write_text(
507+
"commitizen:\n"
508+
' bump_message: "🚀 chore: bump $current_version to $new_version"\n',
509+
encoding="utf-8",
510+
)
511+
512+
yaml_config = YAMLConfig(data=path.read_text(encoding="utf-8"), path=path)
513+
yaml_config.set_key("version", "0.1.1")
514+
515+
rewritten = path.read_text(encoding="utf-8")
516+
assert "🚀" in rewritten
517+
assert "\\U0001F680" not in rewritten
518+
519+
def test_init_empty_config_content_passes_allow_unicode(
520+
self, tmp_path, config_file, mocker
521+
):
522+
"""``init_empty_config_content`` must call ``yaml.dump`` with
523+
``allow_unicode=True`` so that any non-ASCII default content (for
524+
future maintainers) is written verbatim. The current default
525+
(``{"commitizen": {}}``) is ASCII-only, so this asserts the
526+
keyword is passed rather than its observable behaviour."""
527+
path = tmp_path / "commitizen" / config_file
528+
path.parent.mkdir(parents=True, exist_ok=True)
529+
dump_spy = mocker.spy(yaml, "dump")
530+
531+
yaml_config = YAMLConfig(data="{}", path=path)
532+
yaml_config.init_empty_config_content()
533+
534+
dump_spy.assert_called_once()
535+
assert dump_spy.call_args.kwargs.get("allow_unicode") is True

0 commit comments

Comments
 (0)