Merge branch 'main' into fabric_alter_table_no_op

tobymao · web-flow · commit 2e779938ae87 · 2025-12-02T09:20:17.000-08:00
diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py
@@ -115,6 +115,7 @@
     ModelTestMetadata,
     generate_test,
     run_tests,
+    filter_tests_by_patterns,
 )
 from sqlmesh.core.user import User
 from sqlmesh.utils import UniqueKeyDict, Verbosity
@@ -146,8 +147,8 @@
     from typing_extensions import Literal
 
     from sqlmesh.core.engine_adapter._typing import (
-        BigframeSession,
         DF,
+        BigframeSession,
         PySparkDataFrame,
         PySparkSession,
         SnowparkSession,
@@ -398,6 +399,10 @@ def __init__(
         self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict(
             "standaloneaudits"
         )
+        self._model_test_metadata: t.List[ModelTestMetadata] = []
+        self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {}
+        self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {}
+        self._models_with_tests: t.Set[str] = set()
         self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros")
         self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics")
         self._jinja_macros = JinjaMacroRegistry()
@@ -636,6 +641,10 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]:
         self._excluded_requirements.clear()
         self._linters.clear()
         self._environment_statements = []
+        self._model_test_metadata.clear()
+        self._model_test_metadata_path_index.clear()
+        self._model_test_metadata_fully_qualified_name_index.clear()
+        self._models_with_tests.clear()
 
         for loader, project in zip(self._loaders, loaded_projects):
             self._jinja_macros = self._jinja_macros.merge(project.jinja_macros)
@@ -647,6 +656,15 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]:
             self._requirements.update(project.requirements)
             self._excluded_requirements.update(project.excluded_requirements)
             self._environment_statements.extend(project.environment_statements)
+            self._model_test_metadata.extend(project.model_test_metadata)
+            for metadata in project.model_test_metadata:
+                if metadata.path not in self._model_test_metadata_path_index:
+                    self._model_test_metadata_path_index[metadata.path] = []
+                self._model_test_metadata_path_index[metadata.path].append(metadata)
+                self._model_test_metadata_fully_qualified_name_index[
+                    metadata.fully_qualified_test_name
+                ] = metadata
+                self._models_with_tests.add(metadata.model_name)
 
             config = loader.config
             self._linters[config.project] = Linter.from_rules(
@@ -1049,6 +1067,11 @@ def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]:
         """Returns all registered standalone audits in this context."""
         return MappingProxyType(self._standalone_audits)
 
+    @property
+    def models_with_tests(self) -> t.Set[str]:
+        """Returns all models with tests in this context."""
+        return self._models_with_tests
+
     @property
     def snapshots(self) -> t.Dict[str, Snapshot]:
         """Generates and returns snapshots based on models registered in this context.
@@ -2220,7 +2243,9 @@ def test(
 
             pd.set_option("display.max_columns", None)
 
-        test_meta = self.load_model_tests(tests=tests, patterns=match_patterns)
+        test_meta = self._select_tests(
+            test_meta=self._model_test_metadata, tests=tests, patterns=match_patterns
+        )
 
         result = run_tests(
             model_test_metadata=test_meta,
@@ -2782,6 +2807,33 @@ def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter:
             raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.")
         return self.engine_adapter
 
+    def _select_tests(
+        self,
+        test_meta: t.List[ModelTestMetadata],
+        tests: t.Optional[t.List[str]] = None,
+        patterns: t.Optional[t.List[str]] = None,
+    ) -> t.List[ModelTestMetadata]:
+        """Filter pre-loaded test metadata based on tests and patterns."""
+
+        if tests:
+            filtered_tests = []
+            for test in tests:
+                if "::" in test:
+                    if test in self._model_test_metadata_fully_qualified_name_index:
+                        filtered_tests.append(
+                            self._model_test_metadata_fully_qualified_name_index[test]
+                        )
+                else:
+                    test_path = Path(test)
+                    if test_path in self._model_test_metadata_path_index:
+                        filtered_tests.extend(self._model_test_metadata_path_index[test_path])
+            test_meta = filtered_tests
+
+        if patterns:
+            test_meta = filter_tests_by_patterns(test_meta, patterns)
+
+        return test_meta
+
     def _snapshots(
         self, models_override: t.Optional[UniqueKeyDict[str, Model]] = None
     ) -> t.Dict[str, Snapshot]:
diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py
@@ -394,3 +394,17 @@ def _build_table_properties_exp(
             expressions.append(clustered_by_exp)
             properties = exp.Properties(expressions=expressions)
         return properties
+
+    def _build_column_defs(
+        self,
+        target_columns_to_types: t.Dict[str, exp.DataType],
+        column_descriptions: t.Optional[t.Dict[str, str]] = None,
+        is_view: bool = False,
+    ) -> t.List[exp.ColumnDef]:
+        # Databricks requires column types to be specified when adding column comments
+        # in CREATE MATERIALIZED VIEW statements. Override is_view to False to force
+        # column types to be included when comments are present.
+        if is_view and column_descriptions:
+            is_view = False
+
+        return super()._build_column_defs(target_columns_to_types, column_descriptions, is_view)
diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py
@@ -129,6 +129,21 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]:
             return self.violation()
 
 
+class NoMissingUnitTest(Rule):
+    """All models must have a unit test found in the test/ directory yaml files"""
+
+    def check_model(self, model: Model) -> t.Optional[RuleViolation]:
+        #  External models cannot have unit tests
+        if isinstance(model, ExternalModel):
+            return None
+
+        if model.name not in self.context.models_with_tests:
+            return self.violation(
+                violation_msg=f"Model {model.name} is missing unit test(s). Please add in the tests/ directory."
+            )
+        return None
+
+
 class NoMissingExternalModels(Rule):
     """All external models must be registered in the external_models.yaml file"""
 
diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py
@@ -64,6 +64,7 @@ class LoadedProject:
     excluded_requirements: t.Set[str]
     environment_statements: t.List[EnvironmentStatements]
     user_rules: RuleSet
+    model_test_metadata: t.List[ModelTestMetadata]
 
 
 class CacheBase(abc.ABC):
@@ -243,6 +244,8 @@ def load(self) -> LoadedProject:
 
             user_rules = self._load_linting_rules()
 
+            model_test_metadata = self.load_model_tests()
+
             project = LoadedProject(
                 macros=macros,
                 jinja_macros=jinja_macros,
@@ -254,6 +257,7 @@ def load(self) -> LoadedProject:
                 excluded_requirements=excluded_requirements,
                 environment_statements=environment_statements,
                 user_rules=user_rules,
+                model_test_metadata=model_test_metadata,
             )
             return project
 
diff --git a/sqlmesh/core/test/discovery.py b/sqlmesh/core/test/discovery.py
@@ -20,6 +20,10 @@ class ModelTestMetadata(PydanticModel):
     def fully_qualified_test_name(self) -> str:
         return f"{self.path}::{self.test_name}"
 
+    @property
+    def model_name(self) -> str:
+        return self.body.get("model", "")
+
     def __hash__(self) -> int:
         return self.fully_qualified_test_name.__hash__()
 
diff --git a/tests/core/engine_adapter/test_databricks.py b/tests/core/engine_adapter/test_databricks.py
@@ -376,6 +376,36 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad
     ]
 
 
+def test_materialized_view_with_column_comments(
+    mocker: MockFixture, make_mocked_engine_adapter: t.Callable
+):
+    mocker.patch(
+        "sqlmesh.core.engine_adapter.databricks.DatabricksEngineAdapter.set_current_catalog"
+    )
+    adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog")
+    mocker.patch.object(adapter, "get_current_catalog", return_value="test_catalog")
+
+    adapter.create_view(
+        "test_view",
+        parse_one("SELECT a, b FROM source_table"),
+        target_columns_to_types={
+            "a": exp.DataType.build("INT"),
+            "b": exp.DataType.build("STRING"),
+        },
+        materialized=True,
+        column_descriptions={
+            "a": "column a description",
+            "b": "column b description",
+        },
+    )
+
+    sql_calls = to_sql_calls(adapter)
+    # Databricks requires column types when column comments are present in materialized views
+    assert sql_calls == [
+        "CREATE OR REPLACE MATERIALIZED VIEW `test_view` (`a` INT COMMENT 'column a description', `b` STRING COMMENT 'column b description') AS SELECT `a`, `b` FROM `source_table`",
+    ]
+
+
 def test_create_table_clustered_by(mocker: MockFixture, make_mocked_engine_adapter: t.Callable):
     mocker.patch(
         "sqlmesh.core.engine_adapter.databricks.DatabricksEngineAdapter.set_current_catalog"
diff --git a/tests/core/linter/test_builtin.py b/tests/core/linter/test_builtin.py
@@ -172,3 +172,63 @@ def test_no_missing_external_models_with_existing_file_not_ending_in_newline(
     )
     fix_path = sushi_path / "external_models.yaml"
     assert edit.path == fix_path
+
+
+def test_no_missing_unit_tests(tmp_path, copy_to_temp_path):
+    """
+    Tests that the NoMissingUnitTest linter rule correctly identifies models
+    without corresponding unit tests in the tests/ directory
+
+    This test checks the sushi example project, enables the linter,
+    and verifies that the linter raises a rule violation for the models
+    that do not have a unit test
+    """
+    sushi_paths = copy_to_temp_path("examples/sushi")
+    sushi_path = sushi_paths[0]
+
+    # Override the config.py to turn on lint
+    with open(sushi_path / "config.py", "r") as f:
+        read_file = f.read()
+
+    before = """    linter=LinterConfig(
+        enabled=False,
+        rules=[
+            "ambiguousorinvalidcolumn",
+            "invalidselectstarexpansion",
+            "noselectstar",
+            "nomissingaudits",
+            "nomissingowner",
+            "nomissingexternalmodels",
+        ],
+    ),"""
+    after = """linter=LinterConfig(enabled=True, rules=["nomissingunittest"]),"""
+    read_file = read_file.replace(before, after)
+    assert after in read_file
+    with open(sushi_path / "config.py", "w") as f:
+        f.writelines(read_file)
+
+    # Load the context with the temporary sushi path
+    context = Context(paths=[sushi_path])
+
+    # Lint the models
+    lints = context.lint_models(raise_on_error=False)
+
+    # Should have violations for models without tests (most models except customers)
+    assert len(lints) >= 1
+
+    # Check that we get violations for models without tests
+    violation_messages = [lint.violation_msg for lint in lints]
+    assert any("is missing unit test(s)" in msg for msg in violation_messages)
+
+    # Check that models with existing tests don't have violations
+    models_with_tests = ["customer_revenue_by_day", "customer_revenue_lifetime", "order_items"]
+
+    for model_name in models_with_tests:
+        model_violations = [
+            lint
+            for lint in lints
+            if model_name in lint.violation_msg and "is missing unit test(s)" in lint.violation_msg
+        ]
+        assert len(model_violations) == 0, (
+            f"Model {model_name} should not have a violation since it has a test"
+        )
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
@@ -1539,6 +1539,9 @@ def test_gateway(copy_to_temp_path: t.Callable, mocker: MockerFixture) -> None:
     with open(test_path, "w", encoding="utf-8") as file:
         dump_yaml(test_dict, file)
 
+    # Re-initialize context to pick up the modified test file
+    context = Context(paths=path, config=config)
+
     spy_execute = mocker.spy(EngineAdapter, "_execute")
     mocker.patch("sqlmesh.core.test.definition.random_id", return_value="jzngz56a")
 
@@ -2448,6 +2451,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None:
         copy_test_file(original_test_file, tmp_path / "tests" / f"test_success_{i}.yaml", i)
         copy_test_file(new_test_file, tmp_path / "tests" / f"test_failure_{i}.yaml", i)
 
+    # Re-initialize context to pick up the new test files
+    context = Context(paths=tmp_path, config=config)
+
     with capture_output() as captured_output:
         context.test()
 
@@ -2463,13 +2469,12 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None:
         "SELECT 1 AS col_1, 2 AS col_2, 3 AS col_3, 4 AS col_4, 5 AS col_5, 6 AS col_6, 7 AS col_7"
     )
 
-    context.upsert_model(
-        _create_model(
-            meta="MODEL(name test.test_wide_model)",
-            query=wide_model_query,
-            default_catalog=context.default_catalog,
-        )
+    wide_model = _create_model(
+        meta="MODEL(name test.test_wide_model)",
+        query=wide_model_query,
+        default_catalog=context.default_catalog,
     )
+    context.upsert_model(wide_model)
 
     tests_dir = tmp_path / "tests"
     tests_dir.mkdir()
@@ -2493,6 +2498,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None:
 
     wide_test_file.write_text(wide_test_file_content)
 
+    context.load()
+    context.upsert_model(wide_model)
+
     with capture_output() as captured_output:
         context.test()
 
@@ -2549,6 +2557,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None:
         """
     )
 
+    # Re-initialize context to pick up the modified test file
+    context = Context(paths=tmp_path, config=config)
+
     with capture_output() as captured_output:
         context.test()
 
@@ -3472,6 +3483,9 @@ def test_cte_failure(tmp_path: Path) -> None:
     """
     )
 
+    # Re-initialize context to pick up the new test file
+    context = Context(paths=tmp_path, config=config)
+
     with capture_output() as captured_output:
         context.test()
 
@@ -3498,6 +3512,9 @@ def test_cte_failure(tmp_path: Path) -> None:
     """
     )
 
+    # Re-initialize context to pick up the modified test file
+    context = Context(paths=tmp_path, config=config)
+
     with capture_output() as captured_output:
         context.test()