tenseleyFlow · mfwolffe · May 3, 2026 · Apr 30, 2026 · Apr 30, 2026 · May 2, 2026
diff --git a/src/dlm/base_models/registry.py b/src/dlm/base_models/registry.py
@@ -337,6 +337,17 @@
         size_gb_fp16=0.27,
         context_length=8_192,
         recommended_seq_len=1024,
+        capability_warning=(
+            "SmolLM2-135M is below dlm's empirical training floor. Audit 13 "
+            "follow-up findings 02 + 05 measured this base actively "
+            "degrading general-chat capability under every LoRA recipe "
+            "tested (PROSE-only, INSTRUCTION-only, mixed). Adapters "
+            "memorize trained content but fail to generalize and bleed "
+            "domain-specific tokens into unrelated queries. Suitable for "
+            "style-transfer demos and pipeline smoke tests; for any "
+            "specialty-knowledge task use a base ≥ 1B params (e.g. "
+            "smollm2-1.7b, qwen2.5-coder-1.5b, llama-3.2-1b)."
+        ),
     ),
     BaseModelSpec(
         key="smollm2-360m",

diff --git a/src/dlm/base_models/schema.py b/src/dlm/base_models/schema.py
@@ -149,6 +149,14 @@ class BaseModelSpec(BaseModel):
     provenance_url: str | None = None
     provenance_match_text: str | None = None
 
+    # Optional curated warning surfaced at `dlm train` time when this
+    # base is selected. Populate when the base has a known limitation
+    # that's not derivable from `params` / `architecture` alone — e.g.
+    # SmolLM2-135M's measured architectural floor (audit 13 follow-up
+    # findings 02 + 05: actively degrades base capability under any
+    # LoRA recipe). Empty string is treated as "no warning".
+    capability_warning: str | None = Field(default=None, min_length=1)
+
     # Modality + multi-modal preprocessing (schema v10 + v11, plus the
     # additive `text-moe` discriminator).
     # Text-family bases leave `modality in {"text", "text-moe"}`

diff --git a/src/dlm/cli/commands.py b/src/dlm/cli/commands.py
@@ -853,6 +853,13 @@ def train_cmd(
             "Acceptance will be persisted in the store manifest."
         )
         raise typer.Exit(code=1) from exc
+    # `getattr` so test fixtures stubbing `spec` as a `SimpleNamespace`
+    # without this field still pass; real registry entries always have it.
+    capability_warning = getattr(spec, "capability_warning", None)
+    if capability_warning:
+        console.print(
+            f"[yellow]warning:[/yellow] base [bold]{spec.key}[/bold]: {capability_warning}"
+        )
     # Detect the DDP world_size set by `accelerate launch`
     # (WORLD_SIZE env var) and thread it into the doctor so the plan's
     # effective_batch_size reflects the rank count. Single-process
@@ -3575,28 +3582,45 @@ def cache_show_cmd(
     cache = TokenizedCache.open(store.tokenized_cache_dir)
     last = _queries.latest_tokenization(store.root)
 
+    # The tokenized cache only fires for runs whose frontmatter declares
+    # `training.sources` (directive-sourced rows are where the tokenize
+    # cost dominates; in-body sections go through TRL's tokenizer).
+    # Surface this so an empty cache on an in-body-only doc doesn't
+    # look like a bug.
+    has_sources = parsed.frontmatter.training.sources is not None
+    cache_enabled = parsed.frontmatter.training.cache.enabled
+    if not has_sources:
+        cache_status: str | None = "not used (doc has no `training.sources` directive)"
+    elif not cache_enabled:
+        cache_status = "disabled (training.cache.enabled = false)"
+    else:
+        cache_status = None
+
     payload: dict[str, object] = {
         "dlm_id": parsed.frontmatter.dlm_id,
         "cache_path": str(store.tokenized_cache_dir),
         "entry_count": cache.entry_count,
         "bytes": cache.total_bytes,
         "last_run_hit_rate": last.hit_rate if last else None,
         "last_run_id": last.run_id if last else None,
+        "cache_status": cache_status,
     }
     if json_out:
         _sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
         return
 
     out_console.print(f"[bold]Cache for {parsed.frontmatter.dlm_id}[/bold]")
     out_console.print(f"  path:              {store.tokenized_cache_dir}")
+    if cache_status is not None:
+        out_console.print(f"  status:            [yellow]{cache_status}[/yellow]")
     out_console.print(f"  entries:           {cache.entry_count}")
     out_console.print(f"  size:              {_human_size(cache.total_bytes)}")
     if last is not None:
         out_console.print(
             f"  last-run hit rate: {last.hit_rate:.1%} "
             f"({last.cache_hits}/{last.cache_hits + last.cache_misses})"
         )
-    else:
+    elif cache_status is None:
         out_console.print("  last-run hit rate: [dim]no tokenization runs yet[/dim]")
 
 

diff --git a/src/dlm/doc/migrate.py b/src/dlm/doc/migrate.py
@@ -97,7 +97,17 @@ def migrate_file(
     # Validate post-migration dict against the current schema so a bad
     # migrator can't silently smear garbage into the document.
     fm = DlmFrontmatter.model_validate(migrated)
-    new_text = _rejoin(fm, body_text)
+    # Preserve the user's *originally explicit* fields across migration
+    # by collecting their dotted paths from the post-migration dict and
+    # passing them to the serializer as force-emit overrides. Without
+    # this, a v1 doc with `lora_r: 8` (matching the current schema
+    # default) would silently lose the explicit pin and inherit any
+    # future default change. The contract `CLAUDE.md` calls "additive
+    # identity" is honored at *intent* level, not just behavior level.
+    from dlm.doc.serializer import collect_dict_field_paths
+
+    force_emit = collect_dict_field_paths(migrated)
+    new_text = _rejoin(fm, body_text, force_emit_paths=force_emit)
 
     if dry_run:
         return MigrationResult(
@@ -152,7 +162,12 @@ def _split_for_migrate(text: str, *, path: Path) -> tuple[str, str]:
     )
 
 
-def _rejoin(fm: DlmFrontmatter, body_text: str) -> str:
+def _rejoin(
+    fm: DlmFrontmatter,
+    body_text: str,
+    *,
+    force_emit_paths: frozenset[tuple[str, ...]] | None = None,
+) -> str:
     """Re-assemble a `.dlm` file from a migrated frontmatter + raw body.
 
     Preserves the body verbatim (migration never touches section content);
@@ -165,7 +180,7 @@ def _rejoin(fm: DlmFrontmatter, body_text: str) -> str:
     # section serialization by handing an empty sections tuple and
     # concatenating the raw body manually.
     empty = ParsedDlm(frontmatter=fm, sections=_empty_sections())
-    header = serialize(empty)  # always ends with "\n"
+    header = serialize(empty, force_emit_paths=force_emit_paths)  # always ends with "\n"
 
     # Normalize leading/trailing whitespace on the body to match the
     # canonical layout: exactly one blank line between `---\n` closer

diff --git a/src/dlm/doc/serializer.py b/src/dlm/doc/serializer.py
@@ -34,12 +34,23 @@
 )
 
 
-def serialize(parsed: ParsedDlm) -> str:
+def serialize(
+    parsed: ParsedDlm,
+    *,
+    force_emit_paths: frozenset[tuple[str, ...]] | None = None,
+) -> str:
     """Produce canonical `.dlm` text for `parsed`.
 
     Always ends with `\\n`.
+
+    `force_emit_paths` is consulted by `_emit_nested_mapping` — a field
+    whose dotted path appears in the set is emitted even when its value
+    matches the schema default. Used by the migrate pipeline to
+    preserve user-explicit fields across schema-default drift (so a
+    user who pinned `lora_r: 8` doesn't silently inherit a future
+    `lora_r: 16` default after migration).
     """
-    parts: list[str] = [_serialize_frontmatter(parsed.frontmatter), "\n"]
+    parts: list[str] = [_serialize_frontmatter(parsed.frontmatter, force_emit_paths), "\n"]
     for i, section in enumerate(parsed.sections):
         if i > 0:
             parts.append("\n")
@@ -50,10 +61,41 @@ def serialize(parsed: ParsedDlm) -> str:
     return rendered
 
 
+def collect_dict_field_paths(d: object, prefix: tuple[str, ...] = ()) -> frozenset[tuple[str, ...]]:
+    """Walk a parsed-YAML dict and return every nested leaf-or-mapping path.
+
+    Used by the migrate pipeline: the set of paths present in the
+    user's original frontmatter (after migration runs) is the set of
+    fields the serializer must emit even when they match defaults.
+    Mappings *and* leaves are both included so intermediate blocks
+    survive re-emission.
+    """
+    paths: set[tuple[str, ...]] = set()
+    if isinstance(d, dict):
+        for k, v in d.items():
+            if not isinstance(k, str):
+                continue
+            here = (*prefix, k)
+            paths.add(here)
+            if isinstance(v, dict):
+                paths.update(collect_dict_field_paths(v, here))
+            elif isinstance(v, list):
+                # List of mappings (e.g. training.sources) — each item
+                # contributes paths under the same parent key, since
+                # we serialize positional list entries together.
+                for item in v:
+                    if isinstance(item, dict):
+                        paths.update(collect_dict_field_paths(item, here))
+    return frozenset(paths)
+
+
 # --- frontmatter --------------------------------------------------------------
 
 
-def _serialize_frontmatter(fm: DlmFrontmatter) -> str:
+def _serialize_frontmatter(
+    fm: DlmFrontmatter,
+    force_emit_paths: frozenset[tuple[str, ...]] | None = None,
+) -> str:
     lines: list[str] = ["---"]
     for key in _FRONTMATTER_ORDER:
         value = getattr(fm, key, None)
@@ -63,7 +105,9 @@ def _serialize_frontmatter(fm: DlmFrontmatter) -> str:
             lines.extend(_emit_block_scalar(key, value))
             continue
         if isinstance(value, TrainingConfig | ExportConfig):
-            nested = _emit_nested_mapping(value, indent=2)
+            nested = _emit_nested_mapping(
+                value, indent=2, path=(key,), force_emit_paths=force_emit_paths
+            )
             if not nested:
                 # All-default nested block — skip the header too so we
                 # don't emit an empty `training:` line.
@@ -76,7 +120,13 @@ def _serialize_frontmatter(fm: DlmFrontmatter) -> str:
     return "\n".join(lines) + "\n"
 
 
-def _emit_nested_mapping(model: BaseModel, *, indent: int) -> list[str]:
+def _emit_nested_mapping(
+    model: BaseModel,
+    *,
+    indent: int,
+    path: tuple[str, ...] = (),
+    force_emit_paths: frozenset[tuple[str, ...]] | None = None,
+) -> list[str]:
     """Emit a nested training/export/dpo block.
 
     Suppress fields that equal their schema default so
@@ -87,6 +137,11 @@ def _emit_nested_mapping(model: BaseModel, *, indent: int) -> list[str]:
 
     Nested `BaseModel` values (e.g. `TrainingConfig.preference`)
     recurse with deeper indent; all-default sub-blocks are skipped.
+
+    `force_emit_paths` overrides the default-suppression rule for any
+    field whose dotted path appears in the set. Used by the migrate
+    pipeline to preserve user-explicit fields across schema-default
+    drift.
     """
     pad = " " * indent
     lines: list[str] = []
@@ -99,16 +154,28 @@ def _emit_nested_mapping(model: BaseModel, *, indent: int) -> list[str]:
 
     for field_name, field_info in model.__class__.model_fields.items():
         value = getattr(model, field_name)
-        if field_info.default is not PydanticUndefined and value == field_info.default:
+        field_path = (*path, field_name)
+        forced = force_emit_paths is not None and field_path in force_emit_paths
+        if (
+            not forced
+            and field_info.default is not PydanticUndefined
+            and value == field_info.default
+        ):
             continue
         if (
-            field_info.default is PydanticUndefined
+            not forced
+            and field_info.default is PydanticUndefined
             and field_info.default_factory is not None
             and value == field_info.default_factory()  # type: ignore[call-arg]
         ):
             continue
         if isinstance(value, BaseModel):
-            nested = _emit_nested_mapping(value, indent=indent + 2)
+            nested = _emit_nested_mapping(
+                value,
+                indent=indent + 2,
+                path=field_path,
+                force_emit_paths=force_emit_paths,
+            )
             if not nested:
                 continue
             lines.append(f"{pad}{field_name}:")
@@ -125,7 +192,12 @@ def _emit_nested_mapping(model: BaseModel, *, indent: int) -> list[str]:
             lines.append(f"{pad}{field_name}:")
             for k, v in value.items():
                 lines.append(f"{pad}  {k}:")
-                nested = _emit_nested_mapping(v, indent=indent + 4)
+                nested = _emit_nested_mapping(
+                    v,
+                    indent=indent + 4,
+                    path=(*field_path, k),
+                    force_emit_paths=force_emit_paths,
+                )
                 if nested:
                     lines.extend(nested)
                 else:
@@ -144,7 +216,12 @@ def _emit_nested_mapping(model: BaseModel, *, indent: int) -> list[str]:
             # fields indent aligned.
             lines.append(f"{pad}{field_name}:")
             for item in value:
-                nested = _emit_nested_mapping(item, indent=indent + 4)
+                nested = _emit_nested_mapping(
+                    item,
+                    indent=indent + 4,
+                    path=field_path,
+                    force_emit_paths=force_emit_paths,
+                )
                 if not nested:
                     lines.append(f"{pad}  - {{}}")
                     continue

diff --git a/src/dlm/export/ollama/modelfile_shared.py b/src/dlm/export/ollama/modelfile_shared.py
@@ -129,8 +129,14 @@ def build_param_lines(
     if num_ctx is not None:
         lines.append(f"PARAMETER num_ctx {num_ctx}")
     if draft_model is not None:
-        lines.append(f"# Speculative decoding: `ollama pull {draft_model}` first.")
-        lines.append(f"PARAMETER draft_model {draft_model}")
+        # `draft_model` is not a valid Modelfile PARAMETER directive
+        # (Ollama rejects `ollama create` with "unknown parameter
+        # 'draft_model'"). It's a runtime option exposed via the
+        # `OLLAMA_DRAFT_MODEL` env var or the API's `options.draft_model`
+        # field. Document the suggested pairing as a comment so users
+        # can wire it up without forcing-fail their `ollama create`.
+        lines.append(f"# Speculative-decoding draft: `ollama pull {draft_model}`")
+        lines.append(f"# then run with `OLLAMA_DRAFT_MODEL={draft_model} ollama run <this-model>`")
     return lines
 
 

diff --git a/src/dlm/export/preflight.py b/src/dlm/export/preflight.py
@@ -98,8 +98,11 @@ def check_tokenizer_vocab(adapter_dir: Path) -> int:
             detail=f"cannot parse {cfg_path}: {exc}",
         ) from exc
 
-    # `vocab_size` key isn't always present in tokenizer_config.json;
-    # fall back to the companion tokenizer.json which always carries it.
+    # `vocab_size` key isn't always present in tokenizer_config.json
+    # (Qwen2.5+, Llama-3.x omit it); fall back to summing the BPE base
+    # plus the explicit `added_tokens` array in tokenizer.json. This
+    # matches `len(transformers.AutoTokenizer.from_pretrained(...))` —
+    # the count the model actually addresses at inference time.
     vocab_size = cfg.get("vocab_size")
     if not isinstance(vocab_size, int):
         tokenizer_json = adapter_dir / "tokenizer.json"
@@ -113,8 +116,9 @@ def check_tokenizer_vocab(adapter_dir: Path) -> int:
                 ) from exc
             model = t.get("model") or {}
             vocab = model.get("vocab")
+            added = t.get("added_tokens") or []
             if isinstance(vocab, dict):
-                vocab_size = len(vocab)
+                vocab_size = len(vocab) + (len(added) if isinstance(added, list) else 0)
     if not isinstance(vocab_size, int) or vocab_size <= 0:
         raise PreflightError(
             probe="tokenizer_vocab",
@@ -133,6 +137,10 @@ def check_chat_template(adapter_dir: Path, *, required: bool = True) -> None:
     `--no-template` on the CLI sets `required=False`; the default
     requires one because the Modelfile emitter hardcodes
     `TEMPLATE "..."` which needs source text.
+
+    Modern HF tokenizers (Qwen2.5+, Llama-3.x) write the template to
+    a sibling `chat_template.jinja` file rather than inlining it in
+    `tokenizer_config.json`. Check both locations.
     """
     if not required:
         return
@@ -150,15 +158,30 @@ def check_chat_template(adapter_dir: Path, *, required: bool = True) -> None:
             detail=f"cannot parse {cfg_path}: {exc}",
         ) from exc
     template = cfg.get("chat_template")
-    if not template or not str(template).strip():
-        raise PreflightError(
-            probe="chat_template",
-            detail=(
-                "tokenizer has no chat_template. Pass --no-template to skip "
-                "this check (Modelfile emission will fall back to the base "
-                "model's default), or attach a template via frontmatter."
-            ),
-        )
+    if template and str(template).strip():
+        return
+
+    sibling_path = adapter_dir / "chat_template.jinja"
+    if sibling_path.exists():
+        try:
+            sibling_template = sibling_path.read_text(encoding="utf-8")
+        except OSError as exc:
+            raise PreflightError(
+                probe="chat_template",
+                detail=f"cannot read {sibling_path}: {exc}",
+            ) from exc
+        if sibling_template.strip():
+            return
+
+    raise PreflightError(
+        probe="chat_template",
+        detail=(
+            "tokenizer has no chat_template (checked tokenizer_config.json "
+            "and chat_template.jinja). Pass --no-template to skip "
+            "this check (Modelfile emission will fall back to the base "
+            "model's default), or attach a template via frontmatter."
+        ),
+    )
 
 
 def check_pretokenizer_fingerprint(spec: BaseModelSpec) -> None: