Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sqlmesh/core/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -2488,6 +2488,7 @@ def create_external_models(self, strict: bool = False) -> None:
gateway=external_models_gateway,
max_workers=self.concurrent_tasks,
strict=strict,
all_models=self._models,
)

@python_api_analytics
Expand Down
11 changes: 9 additions & 2 deletions sqlmesh/core/schema_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,33 @@ def create_external_models_file(
gateway: t.Optional[str] = None,
max_workers: int = 1,
strict: bool = False,
all_models: t.Optional[t.Dict[str, Model]] = None,
) -> None:
"""Create or replace a YAML file with column and types of all columns in all external models.

Args:
path: The path to store the YAML file.
models: FQN to model
models: FQN to model for the current repo/config being processed.
adapter: The engine adapter.
state_reader: The state reader.
dialect: The dialect to serialize the schema as.
gateway: If the model should be associated with a specific gateway; the gateway key
max_workers: The max concurrent workers to fetch columns.
strict: If True, raise an error if the external model is missing in the database.
all_models: FQN to model across all loaded repos. When provided, a dependency is only
classified as external if it is absent from this full set. This prevents cross-repo
internal models from being misclassified as external in multi-repo setups.
"""
known_models: t.Union[UniqueKeyDict[str, Model], t.Dict[str, Model]] = (
all_models if all_models is not None else models
)
external_model_fqns = set()

for fqn, model in models.items():
if model.kind.is_external:
external_model_fqns.add(fqn)
for dep in model.depends_on:
if dep not in models:
if dep not in known_models:
external_model_fqns.add(dep)

# Make sure we don't convert internal models into external ones.
Expand Down
56 changes: 56 additions & 0 deletions tests/core/integration/test_multi_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from sqlmesh.core.console import get_console
from sqlmesh.core.context import Context
from sqlmesh.utils import yaml
from sqlmesh.utils.date import now
from tests.conftest import DuckDBMetadata
from tests.utils.test_helpers import use_terminal_console
Expand Down Expand Up @@ -559,3 +560,58 @@ def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path):
gathered_gateways = context.engine_adapters.keys()
expected_gateways = {"local", "memory", "extra"}
assert gathered_gateways == expected_gateways


@use_terminal_console
def test_multi_repo_create_external_models(copy_to_temp_path):
"""create_external_models should not classify cross-repo models as external (sqlmesh#5326).

silver.c and silver.e (repo_2) depend on bronze.a (repo_1). When running
create_external_models with both repos loaded, bronze.a must NOT be treated
as an external model because it is an internal model defined in repo_1.

The observable symptom of the bug is a warning: "Unable to get schema for
'bronze.a'" — because SQLMesh tries to query the schema of what it wrongly
thinks is an external table. A correct implementation never attempts this
lookup and therefore emits no such warning.
"""
paths = copy_to_temp_path("examples/multi")
repo_1_path = f"{paths[0]}/repo_1"
repo_2_path = f"{paths[0]}/repo_2"

context = Context(paths=[repo_1_path, repo_2_path], gateway="memory")
context._new_state_sync().reset(default_catalog=context.default_catalog)

with patch.object(context.console, "log_warning") as mock_warning:
context.create_external_models()

warning_messages = [str(call) for call in mock_warning.call_args_list]
schema_lookup_warnings = [
msg for msg in warning_messages if "bronze" in msg and "a" in msg and "schema" in msg.lower()
]
assert not schema_lookup_warnings, (
"bronze.a should not be looked up as an external model, but got warnings: "
+ str(schema_lookup_warnings)
)

# repo_2's external_models.yaml must not contain bronze.a
repo_2_external = Path(repo_2_path) / c.EXTERNAL_MODELS_YAML
if repo_2_external.exists():
contents = yaml.load(repo_2_external)
external_names = [e["name"] for e in contents]
assert not any("bronze" in name and "a" in name for name in external_names), (
f"bronze.a should not be in repo_2's external models, but found: {external_names}"
)

# repo_1 has no external dependencies at all
repo_1_external = Path(repo_1_path) / c.EXTERNAL_MODELS_YAML
if repo_1_external.exists():
contents = yaml.load(repo_1_external)
assert len(contents) == 0, (
f"repo_1 should have no external models, got: {contents}"
)

# Plan should still resolve all 5 models as internal after create_external_models
context.load()
plan = context.plan_builder().build()
assert len(plan.new_snapshots) == 5