Edit on GitHub

sqlmesh.core.test.definition

View Source

   1from __future__ import annotations
   2
   3import sys
   4
   5import datetime
   6import threading
   7import typing as t
   8import unittest
   9from collections import Counter
  10from contextlib import nullcontext, contextmanager, AbstractContextManager
  11from itertools import chain
  12from pathlib import Path
  13from unittest.mock import patch
  14
  15
  16from io import StringIO
  17from sqlglot import Dialect, exp
  18from sqlglot.optimizer.annotate_types import annotate_types
  19from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
  20
  21from sqlmesh.core import constants as c
  22from sqlmesh.core.dialect import normalize_model_name, schema_
  23from sqlmesh.core.engine_adapter import EngineAdapter
  24from sqlmesh.core.macros import RuntimeStage
  25from sqlmesh.core.model import Model, PythonModel, SqlModel
  26from sqlmesh.utils import UniqueKeyDict, random_id, type_is_known, yaml
  27from sqlmesh.utils.date import date_dict, pandas_timestamp_to_pydatetime, to_datetime
  28from sqlmesh.utils.errors import ConfigError, TestError
  29from sqlmesh.utils.yaml import load as yaml_load
  30from sqlmesh.utils import Verbosity
  31from sqlmesh.utils.rich import df_to_table
  32
  33if t.TYPE_CHECKING:
  34    import pandas as pd
  35
  36    from sqlglot.dialects.dialect import DialectType
  37
  38    Row = t.Dict[str, t.Any]
  39
  40
  41TIME_KWARG_KEYS = {
  42    "start",
  43    "end",
  44    "execution_time",
  45    "latest",
  46    # all built-in datetime macro var names
  47    *date_dict(execution_time="1970-01-01", start="1970-01-01", end="1970-01-01").keys(),
  48}
  49
  50
  51class ModelTest(unittest.TestCase):
  52    __test__ = False
  53
  54    CONCURRENT_RENDER_LOCK = threading.Lock()
  55
  56    def __init__(
  57        self,
  58        body: t.Dict[str, t.Any],
  59        test_name: str,
  60        model: Model,
  61        models: UniqueKeyDict[str, Model],
  62        engine_adapter: EngineAdapter,
  63        dialect: str | None = None,
  64        path: Path | None = None,
  65        preserve_fixtures: bool = False,
  66        default_catalog: str | None = None,
  67        concurrency: bool = False,
  68        verbosity: Verbosity = Verbosity.DEFAULT,
  69    ) -> None:
  70        """ModelTest encapsulates a unit test for a model.
  71
  72        Args:
  73            body: A dictionary that contains test metadata like inputs and outputs.
  74            test_name: The name of the test.
  75            model: The model that is being tested.
  76            models: All models to use for expansion and mapping of physical locations.
  77            engine_adapter: The engine adapter to use.
  78            dialect: The models' dialect, used for normalization purposes.
  79            path: An optional path to the test definition yaml file.
  80            preserve_fixtures: Preserve the fixture tables in the testing database, useful for debugging.
  81        """
  82        self.body = body
  83        self.test_name = test_name
  84        self.model = model
  85        self.models = models
  86        self.engine_adapter = engine_adapter
  87        self.path = path
  88        self.preserve_fixtures = preserve_fixtures
  89        self.default_catalog = default_catalog
  90        self.dialect = dialect
  91        self.concurrency = concurrency
  92        self.verbosity = verbosity
  93
  94        self._fixture_table_cache: t.Dict[str, exp.Table] = {}
  95        self._normalized_column_name_cache: t.Dict[str, str] = {}
  96        self._normalized_model_name_cache: t.Dict[t.Tuple[str, bool], str] = {}
  97
  98        self._test_adapter_dialect = Dialect.get_or_raise(self.engine_adapter.dialect)
  99
 100        self._validate_and_normalize_test()
 101
 102        if self.engine_adapter.default_catalog:
 103            self._fixture_catalog: t.Optional[exp.Identifier] = normalize_identifiers(
 104                exp.parse_identifier(
 105                    self.engine_adapter.default_catalog, dialect=self._test_adapter_dialect
 106                ),
 107                dialect=self._test_adapter_dialect,
 108            )
 109        else:
 110            self._fixture_catalog = None
 111
 112        # The test schema name is randomized to avoid concurrency issues,
 113        # unless a schema is provided in the unit tests's body
 114        self._fixture_schema = exp.parse_identifier(
 115            self.body.get("schema") or f"sqlmesh_test_{random_id(short=True)}"
 116        )
 117        self._qualified_fixture_schema = schema_(self._fixture_schema, self._fixture_catalog)
 118
 119        self._transforms = self._test_adapter_dialect.generator_class.TRANSFORMS
 120        self._execution_time = str(self.body.get("vars", {}).get("execution_time") or "")
 121
 122        if self._execution_time:
 123            # Normalizes the execution time by converting it into UTC timezone
 124            self._execution_time = str(to_datetime(self._execution_time))
 125
 126        # When execution_time is set, we mock the CURRENT_* SQL expressions so they always return it
 127        if self._execution_time:
 128            exec_time = exp.Literal.string(self._execution_time)
 129            self._transforms = {
 130                **self._transforms,
 131                exp.CurrentDate: lambda self, _: self.sql(
 132                    exp.cast(exec_time, "date", dialect=dialect)
 133                ),
 134                exp.CurrentDatetime: lambda self, _: self.sql(
 135                    exp.cast(exec_time, "datetime", dialect=dialect)
 136                ),
 137                exp.CurrentTime: lambda self, _: self.sql(
 138                    exp.cast(exec_time, "time", dialect=dialect)
 139                ),
 140                exp.CurrentTimestamp: lambda self, _: self.sql(
 141                    exp.cast(exec_time, "timestamp", dialect=dialect)
 142                ),
 143            }
 144
 145        super().__init__()
 146
 147    def defaultTestResult(self) -> unittest.TestResult:
 148        from sqlmesh.core.test.result import ModelTextTestResult
 149
 150        return ModelTextTestResult(stream=sys.stdout, descriptions=True, verbosity=self.verbosity)
 151
 152    def shortDescription(self) -> t.Optional[str]:
 153        return self.body.get("description")
 154
 155    def setUp(self) -> None:
 156        """Load all input tables"""
 157        import pandas as pd
 158        import numpy as np
 159
 160        self.engine_adapter.create_schema(self._qualified_fixture_schema)
 161
 162        for name, values in self.body.get("inputs", {}).items():
 163            all_types_are_known = False
 164            columns_to_known_types: t.Dict[str, exp.DataType] = {}
 165
 166            model = self.models.get(name)
 167            if model:
 168                inferred_columns_to_types = model.columns_to_types or {}
 169                columns_to_known_types = {
 170                    c: t for c, t in inferred_columns_to_types.items() if type_is_known(t)
 171                }
 172                all_types_are_known = bool(inferred_columns_to_types) and (
 173                    len(columns_to_known_types) == len(inferred_columns_to_types)
 174                )
 175
 176            # Types specified in the test will override the corresponding inferred ones
 177            columns_to_known_types.update(values.get("columns", {}))
 178
 179            rows = values.get("rows")
 180            if not all_types_are_known and rows:
 181                for col, value in rows[0].items():
 182                    if col not in columns_to_known_types:
 183                        v_type = annotate_types(exp.convert(value)).type or type(value).__name__
 184                        v_type = exp.maybe_parse(
 185                            v_type, into=exp.DataType, dialect=self._test_adapter_dialect
 186                        )
 187
 188                        if not type_is_known(v_type):
 189                            _raise_error(
 190                                f"Failed to infer the data type of column '{col}' for '{name}'. This issue can be "
 191                                "mitigated by casting the column in the model definition, setting its type in "
 192                                "external_models.yaml if it's an external model, setting the model's 'columns' property, "
 193                                "or setting its 'columns' mapping in the test itself",
 194                                self.path,
 195                            )
 196
 197                        columns_to_known_types[col] = v_type
 198
 199            if rows is None:
 200                query_or_df: exp.Query | pd.DataFrame = self._add_missing_columns(
 201                    values["query"], columns_to_known_types
 202                )
 203                if columns_to_known_types:
 204                    columns_to_known_types = {
 205                        col: columns_to_known_types[col] for col in query_or_df.named_selects
 206                    }
 207            else:
 208                query_or_df = self._create_df(values, columns=columns_to_known_types)
 209
 210            # Convert NaN/NaT values to None if DataFrame
 211            if isinstance(query_or_df, pd.DataFrame):
 212                query_or_df = query_or_df.replace({np.nan: None})
 213
 214            self.engine_adapter.create_view(
 215                self._test_fixture_table(name), query_or_df, columns_to_known_types
 216            )
 217
 218    def tearDown(self) -> None:
 219        """Drop all fixture tables."""
 220        if not self.preserve_fixtures:
 221            self.engine_adapter.drop_schema(self._qualified_fixture_schema, cascade=True)
 222
 223    def assert_equal(
 224        self,
 225        expected: pd.DataFrame,
 226        actual: pd.DataFrame,
 227        sort: bool,
 228        partial: t.Optional[bool] = False,
 229    ) -> None:
 230        """Compare two DataFrames"""
 231        import numpy as np
 232        import pandas as pd
 233        from pandas.api.types import is_object_dtype
 234
 235        if partial:
 236            intersection = actual[actual.columns.intersection(expected.columns)]
 237            if len(intersection.columns) > 0:
 238                actual = intersection
 239
 240        # Two astypes are necessary, pandas converts strings to times as NS,
 241        # but if the actual is US, it doesn't take effect until the 2nd try!
 242        actual_types = actual.dtypes.to_dict()
 243        expected = expected.astype(actual_types, errors="ignore").astype(
 244            actual_types, errors="ignore"
 245        )
 246
 247        # The `actual` df's dtypes will almost always be pd.Timestamp for datetime values,
 248        # but in some scenarios (e.g., DuckDB >=0.10.2) it will be a pandas `object` type
 249        # containing python `datetime.xxx` values.
 250        #
 251        # Pandas `object` columns result in a noop for the `astype` call above. Because any
 252        # quoted YAML value is a string, we must manually convert the `expected` df string
 253        # values to the correct `datetime.xxx` type.
 254        #
 255        # We determine the type from a single sentinel value, but since the `actual` df is
 256        # coming from a database query, it is safe to assume that the column contains only
 257        # a single type.
 258        object_sentinel_values = {
 259            col: actual[col][0]
 260            for col in actual_types
 261            if is_object_dtype(actual_types[col]) and len(actual[col]) != 0
 262        }
 263        for col, value in object_sentinel_values.items():
 264            try:
 265                # can't use `isinstance()` here - https://stackoverflow.com/a/68743663/1707525
 266                if type(value) is datetime.date:
 267                    expected[col] = pd.to_datetime(expected[col]).dt.date
 268                elif type(value) is datetime.time:
 269                    expected[col] = pd.to_datetime(expected[col]).dt.time
 270                elif type(value) is datetime.datetime:
 271                    expected[col] = pd.to_datetime(expected[col]).dt.to_pydatetime()
 272            except Exception as e:
 273                from sqlmesh.core.console import get_console
 274
 275                get_console().log_warning(
 276                    f"Failed to convert expected value for {col} into `datetime` "
 277                    f"for unit test '{str(self)}'. {str(e)}."
 278                )
 279
 280        actual = actual.replace({np.nan: None})
 281        expected = expected.replace({np.nan: None})
 282
 283        # We define this here to avoid a top-level import of numpy and pandas
 284        DATETIME_TYPES = (
 285            datetime.datetime,
 286            datetime.date,
 287            datetime.time,
 288            np.datetime64,
 289            pd.Timestamp,
 290        )
 291
 292        def _to_hashable(x: t.Any) -> t.Any:
 293            if isinstance(x, (list, np.ndarray)):
 294                return tuple(_to_hashable(v) for v in x)
 295            if isinstance(x, dict):
 296                return tuple((k, _to_hashable(v)) for k, v in x.items())
 297            return str(x) if isinstance(x, DATETIME_TYPES) or not isinstance(x, t.Hashable) else x
 298
 299        actual = actual.apply(lambda col: col.map(_to_hashable))
 300        expected = expected.apply(lambda col: col.map(_to_hashable))
 301
 302        if sort:
 303            actual = actual.sort_values(by=actual.columns.to_list()).reset_index(drop=True)
 304            expected = expected.sort_values(by=expected.columns.to_list()).reset_index(drop=True)
 305
 306        try:
 307            pd.testing.assert_frame_equal(
 308                expected,
 309                actual,
 310                check_dtype=False,
 311                check_like=True,  # Ignore column order
 312            )
 313        except AssertionError as e:
 314            # There are 2 concepts at play here:
 315            # 1. The Exception args will contain the error message plus the diff dataframe table stringified
 316            #    (backwards compatibility with existing tests, possible to serialize/send over network etc)
 317            # 2. Each test will also transform these diff dataframes into Rich tables, which will be the ones that'll
 318            #    be surfaced to the user through Console for better UX (versus stringified dataframes)
 319            #
 320            # This is a bit of a hack, but it's a way to get the best of both worlds.
 321            args: t.List[t.Any] = []
 322
 323            failed_subtest = ""
 324
 325            if subtest := getattr(self, "_subtest", None):
 326                if cte := subtest.params.get("cte"):
 327                    failed_subtest = f" (CTE {cte})"
 328
 329            if expected.shape != actual.shape:
 330                _raise_if_unexpected_columns(expected.columns, actual.columns)
 331
 332                args.append("Data mismatch (rows are different)")
 333
 334                missing_rows = _row_difference(expected, actual)
 335                if not missing_rows.empty:
 336                    args[0] += f"\n\nMissing rows:\n\n{missing_rows}"
 337                    args.append(df_to_table(f"Missing rows{failed_subtest}", missing_rows))
 338
 339                unexpected_rows = _row_difference(actual, expected)
 340
 341                if not unexpected_rows.empty:
 342                    args[0] += f"\n\nUnexpected rows:\n\n{unexpected_rows}"
 343                    args.append(df_to_table(f"Unexpected rows{failed_subtest}", unexpected_rows))
 344
 345            else:
 346                diff = expected.compare(actual).rename(columns={"self": "exp", "other": "act"})
 347
 348                args.append(f"Data mismatch (exp: expected, act: actual)\n\n{diff}")
 349
 350                diff.rename(columns={"exp": "Expected", "act": "Actual"}, inplace=True)
 351                if self.verbosity == Verbosity.DEFAULT:
 352                    args.extend(
 353                        df_to_table(f"Data mismatch{failed_subtest}", df)
 354                        for df in _split_df_by_column_pairs(diff)
 355                    )
 356                else:
 357                    from pandas import DataFrame, MultiIndex
 358
 359                    levels = t.cast(MultiIndex, diff.columns).levels[0]
 360                    for col in levels:
 361                        # diff[col] returns a DataFrame when columns is a MultiIndex
 362                        col_diff = t.cast(DataFrame, diff[col])
 363                        if not col_diff.empty:
 364                            table = df_to_table(
 365                                f"[bold red]Column '{col}' mismatch{failed_subtest}[/bold red]",
 366                                col_diff,
 367                            )
 368                            args.append(table)
 369
 370            e.args = (*args,)
 371
 372            raise e
 373
 374    def runTest(self) -> None:
 375        raise NotImplementedError
 376
 377    def path_relative_to(self, other: Path) -> Path | None:
 378        """Compute a version of this test's path relative to the `other` path"""
 379        return self.path.relative_to(other) if self.path else None
 380
 381    @staticmethod
 382    def create_test(
 383        body: t.Dict[str, t.Any],
 384        test_name: str,
 385        models: UniqueKeyDict[str, Model],
 386        engine_adapter: EngineAdapter,
 387        dialect: str | None,
 388        path: Path | None,
 389        preserve_fixtures: bool = False,
 390        default_catalog: str | None = None,
 391        concurrency: bool = False,
 392        verbosity: Verbosity = Verbosity.DEFAULT,
 393    ) -> t.Optional[ModelTest]:
 394        """Create a SqlModelTest or a PythonModelTest.
 395
 396        Args:
 397            body: A dictionary that contains test metadata like inputs and outputs.
 398            test_name: The name of the test.
 399            models: All models to use for expansion and mapping of physical locations.
 400            engine_adapter: The engine adapter to use.
 401            dialect: The models' dialect, used for normalization purposes.
 402            path: An optional path to the test definition yaml file.
 403            preserve_fixtures: Preserve the fixture tables in the testing database, useful for debugging.
 404        """
 405        name = body.get("model")
 406        if name is None:
 407            _raise_error("Missing required 'model' field", path)
 408
 409        name = normalize_model_name(name, default_catalog=default_catalog, dialect=dialect)
 410        model = models.get(name)
 411        if not model:
 412            from sqlmesh.core.console import get_console
 413
 414            get_console().log_warning(
 415                f"Model '{name}' was not found{' at ' + str(path) if path else ''}"
 416            )
 417            return None
 418
 419        if isinstance(model, SqlModel):
 420            test_type: t.Type[ModelTest] = SqlModelTest
 421        elif isinstance(model, PythonModel):
 422            test_type = PythonModelTest
 423        else:
 424            _raise_error(f"Model '{name}' is an unsupported model type for testing", path)
 425
 426        try:
 427            return test_type(
 428                body,
 429                test_name,
 430                t.cast(Model, model),
 431                models,
 432                engine_adapter,
 433                dialect,
 434                path,
 435                preserve_fixtures,
 436                default_catalog,
 437                concurrency,
 438                verbosity,
 439            )
 440        except Exception as e:
 441            raise TestError(f"Failed to create test {test_name} ({path})\n{str(e)}")
 442
 443    def __str__(self) -> str:
 444        return f"{self.test_name} ({self.path})"
 445
 446    def _validate_and_normalize_test(self) -> None:
 447        inputs = self.body.get("inputs")
 448        outputs = self.body.get("outputs", {})
 449
 450        if not outputs:
 451            _raise_error("Incomplete test, missing outputs", self.path)
 452
 453        ctes = outputs.get("ctes")
 454        query = outputs.get("query")
 455        partial = outputs.pop("partial", None)
 456
 457        if ctes is None and query is None:
 458            _raise_error("Incomplete test, outputs must contain 'query' or 'ctes'", self.path)
 459
 460        def _normalize_rows(
 461            values: t.List[Row] | t.Dict,
 462            name: str,
 463            partial: bool = False,
 464            dialect: DialectType = None,
 465        ) -> t.Dict:
 466            import pandas as pd
 467
 468            if not isinstance(values, dict):
 469                values = {"rows": values}
 470
 471            rows = values.get("rows")
 472            query = values.get("query")
 473
 474            fmt = values.get("format")
 475            path = values.get("path")
 476            if fmt == "csv":
 477                csv_settings = values.get("csv_settings") or {}
 478                rows = pd.read_csv(path or StringIO(rows), **csv_settings).to_dict(orient="records")
 479            elif fmt in (None, "yaml"):
 480                if path:
 481                    input_rows = yaml_load(Path(path))
 482                    rows = input_rows.get("rows") if isinstance(input_rows, dict) else input_rows
 483            else:
 484                _raise_error(f"Unsupported data format '{fmt}' for '{name}'", self.path)
 485
 486            if query is not None:
 487                if rows is not None:
 488                    _raise_error(
 489                        f"Invalid test, cannot set both 'query' and 'rows' for '{name}'", self.path
 490                    )
 491
 492                # We parse the user-supplied query using the testing adapter dialect, but we
 493                # normalize its identifiers according to the model's dialect, so that, e.g.,
 494                # the projection names match those in its `columns_to_types` field
 495                values["query"] = normalize_identifiers(
 496                    exp.maybe_parse(query, dialect=self._test_adapter_dialect), dialect=dialect
 497                )
 498                return values
 499
 500            if rows is None:
 501                _raise_error(f"Incomplete test, missing row data for '{name}'", self.path)
 502
 503            assert isinstance(rows, list)
 504            values["rows"] = [
 505                {self._normalize_column_name(column): value for column, value in row.items()}
 506                for row in rows
 507            ]
 508            if partial:
 509                values["partial"] = True
 510
 511            return values
 512
 513        def _normalize_sources(
 514            sources: t.Dict, partial: bool = False, with_default_catalog: bool = True
 515        ) -> t.Dict:
 516            normalized_sources = {}
 517            for name, values in sources.items():
 518                normalized_name = self._normalize_model_name(
 519                    name, with_default_catalog=with_default_catalog
 520                )
 521                model = self.models.get(normalized_name)
 522                dialect = model.dialect if model else self.dialect
 523
 524                normalized_sources[normalized_name] = _normalize_rows(
 525                    values, name, partial=partial, dialect=dialect
 526                )
 527
 528            return normalized_sources
 529
 530        normalized_model_name = self._normalize_model_name(self.body["model"])
 531        self.body["model"] = normalized_model_name
 532
 533        if inputs:
 534            inputs = _normalize_sources(inputs)
 535            for name, values in inputs.items():
 536                columns = values.get("columns")
 537                if columns is None:
 538                    continue
 539
 540                if not isinstance(columns, dict):
 541                    _raise_error(
 542                        f"Invalid 'columns' value for model '{name}', expected a mapping name -> type",
 543                        self.path,
 544                    )
 545
 546                values["columns"] = {
 547                    self._normalize_column_name(c): exp.DataType.build(
 548                        t, dialect=self._test_adapter_dialect
 549                    )
 550                    for c, t in columns.items()
 551                }
 552
 553            for depends_on in self.model.depends_on:
 554                if depends_on not in inputs:
 555                    _raise_error(f"Incomplete test, missing input model '{depends_on}'", self.path)
 556
 557            if self.model.depends_on_self and normalized_model_name not in inputs:
 558                inputs[normalized_model_name] = {"rows": []}
 559
 560            self.body["inputs"] = inputs
 561
 562        if ctes:
 563            outputs["ctes"] = _normalize_sources(ctes, partial=partial, with_default_catalog=False)
 564
 565        if query or query == []:
 566            outputs["query"] = _normalize_rows(
 567                query, self.model.name, partial=partial, dialect=self.model.dialect
 568            )
 569
 570    def _test_fixture_table(self, name: str) -> exp.Table:
 571        table = self._fixture_table_cache.get(name)
 572        if not table:
 573            table = exp.to_table(name, dialect=self._test_adapter_dialect)
 574
 575            # We change the table path below, so this ensures there are no name clashes
 576            table.this.set("this", "__".join(part.name for part in table.parts))
 577
 578            table.set("db", self._fixture_schema.copy())
 579            if self._fixture_catalog:
 580                table.set("catalog", self._fixture_catalog.copy())
 581
 582            self._fixture_table_cache[name] = table
 583
 584        return table
 585
 586    def _normalize_model_name(self, name: str, with_default_catalog: bool = True) -> str:
 587        normalized_name = self._normalized_model_name_cache.get((name, with_default_catalog))
 588        if normalized_name is None:
 589            default_catalog = self.default_catalog if with_default_catalog else None
 590            normalized_name = normalize_model_name(
 591                name, default_catalog=default_catalog, dialect=self.dialect
 592            )
 593            self._normalized_model_name_cache[(name, with_default_catalog)] = normalized_name
 594
 595        return normalized_name
 596
 597    def _normalize_column_name(self, name: str) -> str:
 598        normalized_name = self._normalized_column_name_cache.get(name)
 599        if normalized_name is None:
 600            normalized_name = normalize_identifiers(name, dialect=self.dialect).name
 601            self._normalized_column_name_cache[name] = normalized_name
 602
 603        return normalized_name
 604
 605    @contextmanager
 606    def _concurrent_render_context(self) -> t.Iterator[None]:
 607        """
 608        Context manager that ensures that the tests are executed safely in a concurrent environment.
 609        This is needed in case `execution_time` is set, as we'd then have to:
 610        - Freeze time through `time_machine` (not thread safe)
 611        - Globally patch the SQLGlot dialect so that any date/time nodes are evaluated at the `execution_time` during generation
 612        """
 613        import time_machine
 614        from sqlglot.generator import _DISPATCH_CACHE
 615
 616        lock_ctx: AbstractContextManager = (
 617            self.CONCURRENT_RENDER_LOCK if self.concurrency else nullcontext()
 618        )
 619        time_ctx: AbstractContextManager = nullcontext()
 620        dialect_patch_ctx: AbstractContextManager = nullcontext()
 621        dispatch_patch_ctx: AbstractContextManager = nullcontext()
 622
 623        if self._execution_time:
 624            generator_class = self._test_adapter_dialect.generator_class
 625            time_ctx = time_machine.travel(self._execution_time, tick=False)
 626            dialect_patch_ctx = patch.dict(generator_class.TRANSFORMS, self._transforms)
 627
 628            # sqlglot caches a dispatch table per generator class, so we need to patch
 629            # it as well to ensure the overridden transforms are actually used
 630            dispatch = _DISPATCH_CACHE.get(generator_class)
 631            if dispatch is not None:
 632                dispatch_patch_ctx = patch.dict(dispatch, self._transforms)
 633
 634        with lock_ctx, time_ctx, dialect_patch_ctx, dispatch_patch_ctx:
 635            yield
 636
 637    def _execute(self, query: exp.Query | str) -> pd.DataFrame:
 638        """Executes the given query using the testing engine adapter and returns a DataFrame."""
 639        return self.engine_adapter.fetchdf(query)
 640
 641    def _create_df(
 642        self,
 643        values: t.Dict[str, t.Any],
 644        columns: t.Optional[t.Collection] = None,
 645        partial: t.Optional[bool] = False,
 646    ) -> pd.DataFrame:
 647        import pandas as pd
 648
 649        query = values.get("query")
 650        if query:
 651            if not partial:
 652                query = self._add_missing_columns(query, columns)
 653
 654            return self._execute(query)
 655
 656        rows = values["rows"]
 657        columns_str: t.Optional[t.List[str]] = None
 658        if columns:
 659            columns_str = [str(c) for c in columns]
 660            referenced_columns = list(dict.fromkeys(col for row in rows for col in row))
 661            _raise_if_unexpected_columns(columns, referenced_columns)
 662
 663            if partial:
 664                columns_str = [c for c in columns_str if c in referenced_columns]
 665
 666        return pd.DataFrame.from_records(rows, columns=columns_str)
 667
 668    def _add_missing_columns(
 669        self, query: exp.Query, all_columns: t.Optional[t.Collection[str]] = None
 670    ) -> exp.Query:
 671        if not all_columns or query.is_star:
 672            return query
 673
 674        query_columns = set(query.named_selects)
 675        missing_columns = [col for col in all_columns if col not in query_columns]
 676        if missing_columns:
 677            query.select(*[exp.null().as_(col) for col in missing_columns], copy=False)
 678
 679        return query
 680
 681
 682class SqlModelTest(ModelTest):
 683    def test_ctes(self, ctes: t.Dict[str, exp.Expr], recursive: bool = False) -> None:
 684        """Run CTE queries and compare output to expected output"""
 685        for cte_name, values in self.body["outputs"].get("ctes", {}).items():
 686            with self.subTest(cte=cte_name):
 687                if cte_name not in ctes:
 688                    _raise_error(
 689                        f"No CTE named {cte_name} found in model {self.model.name}", self.path
 690                    )
 691
 692                cte_query = ctes[cte_name].this
 693
 694                sort = cte_query.args.get("order") is None
 695                partial = values.get("partial")
 696
 697                cte_query = exp.select(*_projection_identifiers(cte_query)).from_(cte_name)
 698                for alias, cte in ctes.items():
 699                    cte_query = cte_query.with_(alias, cte.this, recursive=recursive)
 700
 701                with self._concurrent_render_context():
 702                    # Similar to the model's query, we render the CTE query under the locked context
 703                    # so that the execution (fetchdf) can continue concurrently between the threads
 704                    sql = cte_query.sql(
 705                        self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql
 706                    )
 707
 708                actual = self._execute(sql)
 709                expected = self._create_df(values, columns=cte_query.named_selects, partial=partial)
 710
 711                self.assert_equal(expected, actual, sort=sort, partial=partial)
 712
 713    def runTest(self) -> None:
 714        with self._concurrent_render_context():
 715            # Render the model's query and generate the SQL under the locked context so that
 716            # execution (fetchdf) can continue concurrently between the threads
 717            query = self._render_model_query()
 718            sql = query.sql(self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql)
 719
 720        with_clause = query.args.get("with_")
 721
 722        if with_clause:
 723            self.test_ctes(
 724                {
 725                    self._normalize_model_name(cte.alias, with_default_catalog=False): cte
 726                    for cte in query.ctes
 727                },
 728                recursive=with_clause.recursive,
 729            )
 730
 731        values = self.body["outputs"].get("query")
 732        if values is not None:
 733            partial = values.get("partial")
 734            sort = query.args.get("order") is None
 735
 736            actual = self._execute(sql)
 737            expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
 738
 739            self.assert_equal(expected, actual, sort=sort, partial=partial)
 740
 741    def _render_model_query(self) -> exp.Query:
 742        variables = self.body.get("vars", {}).copy()
 743        time_kwargs = {key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables}
 744
 745        query = self.model.render_query_or_raise(
 746            **time_kwargs,
 747            variables=variables,
 748            engine_adapter=self.engine_adapter,
 749            table_mapping={
 750                name: self._test_fixture_table(name).sql() for name in self.body.get("inputs", {})
 751            },
 752            runtime_stage=RuntimeStage.TESTING,
 753        )
 754        return query
 755
 756
 757class PythonModelTest(ModelTest):
 758    def __init__(
 759        self,
 760        body: t.Dict[str, t.Any],
 761        test_name: str,
 762        model: Model,
 763        models: UniqueKeyDict[str, Model],
 764        engine_adapter: EngineAdapter,
 765        dialect: str | None = None,
 766        path: Path | None = None,
 767        preserve_fixtures: bool = False,
 768        default_catalog: str | None = None,
 769        concurrency: bool = False,
 770        verbosity: Verbosity = Verbosity.DEFAULT,
 771    ) -> None:
 772        """PythonModelTest encapsulates a unit test for a Python model.
 773
 774        Args:
 775            body: A dictionary that contains test metadata like inputs and outputs.
 776            test_name: The name of the test.
 777            model: The Python model that is being tested.
 778            models: All models to use for expansion and mapping of physical locations.
 779            engine_adapter: The engine adapter to use.
 780            dialect: The models' dialect, used for normalization purposes.
 781            path: An optional path to the test definition yaml file.
 782            preserve_fixtures: Preserve the fixture tables in the testing database, useful for debugging.
 783        """
 784        from sqlmesh.core.test.context import TestExecutionContext
 785
 786        super().__init__(
 787            body,
 788            test_name,
 789            model,
 790            models,
 791            engine_adapter,
 792            dialect,
 793            path,
 794            preserve_fixtures,
 795            default_catalog,
 796            concurrency,
 797            verbosity,
 798        )
 799
 800        self.context = TestExecutionContext(
 801            engine_adapter=engine_adapter,
 802            models=models,
 803            test=self,
 804            default_dialect=dialect,
 805            default_catalog=default_catalog,
 806        )
 807
 808    def runTest(self) -> None:
 809        values = self.body["outputs"].get("query")
 810        if values is not None:
 811            partial = values.get("partial")
 812
 813            actual_df = self._execute_model()
 814            actual_df.reset_index(drop=True, inplace=True)
 815            expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
 816
 817            self.assert_equal(expected, actual_df, sort=True, partial=partial)
 818
 819    def _execute_model(self) -> pd.DataFrame:
 820        """Executes the python model and returns a DataFrame."""
 821        import pandas as pd
 822
 823        with self._concurrent_render_context():
 824            variables = self.body.get("vars", {}).copy()
 825            time_kwargs = {key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables}
 826            df = next(self.model.render(context=self.context, variables=variables, **time_kwargs))
 827
 828        assert not isinstance(df, exp.Expr)
 829        return df if isinstance(df, pd.DataFrame) else df.toPandas()
 830
 831
 832def generate_test(
 833    model: Model,
 834    input_queries: t.Dict[str, str],
 835    models: UniqueKeyDict[str, Model],
 836    engine_adapter: EngineAdapter,
 837    test_engine_adapter: EngineAdapter,
 838    project_path: Path,
 839    overwrite: bool = False,
 840    variables: t.Optional[t.Dict[str, str]] = None,
 841    path: t.Optional[str] = None,
 842    name: t.Optional[str] = None,
 843    include_ctes: bool = False,
 844) -> None:
 845    """Generate a unit test fixture for a given model.
 846
 847    Args:
 848        model: The model to test.
 849        input_queries: Mapping of model names to queries. Each model included in this mapping
 850            will be populated in the test based on the results of the corresponding query.
 851        models: The context's models.
 852        engine_adapter: The target engine adapter.
 853        test_engine_adapter: The test engine adapter.
 854        project_path: The path pointing to the project's root directory.
 855        overwrite: Whether to overwrite the existing test in case of a file path collision.
 856            When set to False, an error will be raised if there is such a collision.
 857        variables: Key-value pairs that will define variables needed by the model.
 858        path: The file path corresponding to the fixture, relative to the test directory.
 859            By default, the fixture will be created under the test directory and the file name
 860            will be inferred from the test's name.
 861        name: The name of the test. This is inferred from the model name by default.
 862        include_ctes: When true, CTE fixtures will also be generated.
 863    """
 864    import numpy as np
 865
 866    test_name = name or f"test_{model.view_name}"
 867    path = path or f"{test_name}.yaml"
 868
 869    extension = path.split(".")[-1].lower()
 870    if extension not in ("yaml", "yml"):
 871        path = f"{path}.yaml"
 872
 873    fixture_path = project_path / c.TESTS / path
 874    if not overwrite and fixture_path.exists():
 875        raise ConfigError(
 876            f"Fixture '{fixture_path}' already exists, make sure to set --overwrite if it can be safely overwritten."
 877        )
 878
 879    # ruamel.yaml does not support pandas Timestamps, so we must convert them to python
 880    # datetime or datetime.date objects based on column type
 881    inputs = {
 882        dep: pandas_timestamp_to_pydatetime(
 883            engine_adapter.fetchdf(query).apply(lambda col: col.map(_normalize_df_value)),
 884            models[dep].columns_to_types,
 885        )
 886        .replace({np.nan: None})
 887        .to_dict(orient="records")
 888        for dep, query in input_queries.items()
 889    }
 890    outputs: t.Dict[str, t.Any] = {"query": {}}
 891    variables = variables or {}
 892    test_body = {"model": model.fqn, "inputs": inputs, "outputs": outputs}
 893
 894    if variables:
 895        test_body["vars"] = variables
 896
 897    test = ModelTest.create_test(
 898        body=test_body.copy(),
 899        test_name=test_name,
 900        models=models,
 901        engine_adapter=test_engine_adapter,
 902        dialect=model.dialect,
 903        path=fixture_path,
 904        default_catalog=model.default_catalog,
 905    )
 906    if not test:
 907        return
 908
 909    test.setUp()
 910
 911    if isinstance(model, SqlModel):
 912        assert isinstance(test, SqlModelTest)
 913        model_query = test._render_model_query()
 914        with_clause = model_query.args.get("with_")
 915
 916        if with_clause and include_ctes:
 917            ctes = {}
 918            recursive = with_clause.recursive
 919            previous_ctes: t.List[exp.CTE] = []
 920
 921            for cte in model_query.ctes:
 922                cte_query = cte.this
 923                cte_identifier = cte.args["alias"].this
 924
 925                cte_query = exp.select(*_projection_identifiers(cte_query)).from_(cte_identifier)
 926
 927                for prev in chain(previous_ctes, [cte]):
 928                    cte_query = cte_query.with_(
 929                        prev.args["alias"].this, prev.this, recursive=recursive
 930                    )
 931
 932                cte_output = test._execute(cte_query)
 933                ctes[cte.alias] = (
 934                    pandas_timestamp_to_pydatetime(
 935                        df=cte_output.apply(lambda col: col.map(_normalize_df_value)),
 936                    )
 937                    .replace({np.nan: None})
 938                    .to_dict(orient="records")
 939                )
 940
 941                previous_ctes.append(cte)
 942
 943            if ctes:
 944                outputs["ctes"] = ctes
 945
 946        output = test._execute(model_query)
 947    else:
 948        output = t.cast(PythonModelTest, test)._execute_model()
 949
 950    outputs["query"] = (
 951        pandas_timestamp_to_pydatetime(
 952            output.apply(lambda col: col.map(_normalize_df_value)), model.columns_to_types
 953        )
 954        .replace({np.nan: None})
 955        .to_dict(orient="records")
 956    )
 957
 958    test.tearDown()
 959
 960    fixture_path.parent.mkdir(exist_ok=True, parents=True)
 961    with open(fixture_path, "w", encoding="utf-8") as file:
 962        yaml.dump({test_name: test_body}, file)
 963
 964
 965def _projection_identifiers(query: exp.Query) -> t.List[str | exp.Identifier]:
 966    identifiers: t.List[str | exp.Identifier] = []
 967    for select in query.selects:
 968        if isinstance(select, exp.Alias):
 969            identifiers.append(select.args["alias"])
 970        elif isinstance(select, exp.Column):
 971            identifiers.append(select.this)
 972        else:
 973            identifiers.append(select.output_name)
 974
 975    return identifiers
 976
 977
 978def _raise_if_unexpected_columns(
 979    expected_cols: t.Collection[str], actual_cols: t.Collection[str]
 980) -> None:
 981    unique_expected_cols = set(expected_cols)
 982    unknown_cols = [col for col in actual_cols if col not in unique_expected_cols]
 983
 984    if unknown_cols:
 985        expected = f"Expected column(s): {', '.join(list(expected_cols))}\n"
 986        unknown = f"Unknown column(s): {', '.join(unknown_cols)}"
 987        _raise_error(f"Detected unknown column(s)\n\n{expected}{unknown}")
 988
 989
 990def _row_difference(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
 991    """Returns all rows in `left` that don't appear in `right`."""
 992    import numpy as np
 993    import pandas as pd
 994
 995    rows_missing_from_right = []
 996
 997    # `None` replaces `np.nan` because `np.nan != np.nan` and this would affect the mapping lookup
 998    right_row_count: t.MutableMapping[t.Tuple, int] = Counter(
 999        right.replace({np.nan: None}).itertuples(index=False, name=None)
1000    )
1001    for left_row in left.replace({np.nan: None}).itertuples(index=False):
1002        left_row_tuple = tuple(left_row)
1003        if right_row_count[left_row_tuple] <= 0:
1004            rows_missing_from_right.append(left_row)
1005        else:
1006            right_row_count[left_row_tuple] -= 1
1007
1008    return pd.DataFrame(rows_missing_from_right)
1009
1010
1011def _raise_error(msg: str, path: Path | None = None) -> None:
1012    if path:
1013        raise TestError(f"Failed to run test at {path}:\n{msg}")
1014    raise TestError(f"Failed to run test:\n{msg}")
1015
1016
1017def _normalize_df_value(value: t.Any) -> t.Any:
1018    """Normalize data in a pandas dataframe so ruamel and sqlglot can deal with it."""
1019    import numpy as np
1020
1021    if isinstance(value, (list, np.ndarray)):
1022        return [_normalize_df_value(v) for v in value]
1023    if isinstance(value, dict):
1024        if "key" in value and "value" in value:
1025            # Maps returned by DuckDB look like: {'key': ['key1', 'key2'], 'value': [10, 20]}
1026            # so we convert to {'key1': 10, 'key2': 20} (TODO: handle more dialects here)
1027            return {k: _normalize_df_value(v) for k, v in zip(value["key"], value["value"])}
1028        return {k: _normalize_df_value(v) for k, v in value.items()}
1029    return value
1030
1031
1032def _split_df_by_column_pairs(df: pd.DataFrame, pairs_per_chunk: int = 4) -> t.List[pd.DataFrame]:
1033    """Split a dataframe into chunks of column pairs.
1034
1035    Args:
1036        df: The dataframe to split
1037        pairs_per_chunk: Number of column pairs per chunk (default: 4)
1038
1039    Returns:
1040        List of dataframes, each containing an even number of columns
1041    """
1042    total_columns = len(df.columns)
1043
1044    # If we have fewer columns than pairs_per_chunk * 2, return the original df
1045    if total_columns <= pairs_per_chunk * 2:
1046        return [df]
1047
1048    # Calculate number of chunks needed to split columns evenly
1049    num_chunks = (total_columns + (pairs_per_chunk * 2 - 1)) // (pairs_per_chunk * 2)
1050
1051    # Calculate columns per chunk to ensure equal distribution
1052    # We round down to nearest even number to ensure each chunk has even columns
1053    columns_per_chunk = (total_columns // num_chunks) & ~1  # Round down to nearest even number
1054    remainder = total_columns - (columns_per_chunk * num_chunks)
1055
1056    chunks = []
1057    start_idx = 0
1058
1059    # Distribute columns evenly across chunks
1060    for i in range(num_chunks):
1061        # Add 2 columns to early chunks if there's a remainder
1062        # This ensures we always add pairs of columns
1063        extra = 2 if i < remainder // 2 else 0
1064        end_idx = start_idx + columns_per_chunk + extra
1065        chunk = df.iloc[:, start_idx:end_idx]
1066        chunks.append(chunk)
1067        start_idx = end_idx
1068
1069    return chunks

TIME_KWARG_KEYS = {'execution_millis', 'latest_dtntz', 'execution_ds', 'latest_dt', 'end_dtntz', 'execution_time', 'latest_date', 'end_dt', 'end_hour', 'end_millis', 'execution_tstz', 'end_epoch', 'start_tstz', 'execution_dtntz', 'execution_date', 'end', 'latest_epoch', 'end_ts', 'latest', 'latest_tstz', 'start_ts', 'execution_epoch', 'start_hour', 'latest_millis', 'latest_hour', 'start_epoch', 'execution_hour', 'latest_ts', 'latest_ds', 'end_tstz', 'start_dtntz', 'end_date', 'end_ds', 'execution_dt', 'start_dt', 'execution_ts', 'start_date', 'start_ds', 'start', 'start_millis'}

class SqlModelTest(ModelTest): View Source

683class SqlModelTest(ModelTest):
684    def test_ctes(self, ctes: t.Dict[str, exp.Expr], recursive: bool = False) -> None:
685        """Run CTE queries and compare output to expected output"""
686        for cte_name, values in self.body["outputs"].get("ctes", {}).items():
687            with self.subTest(cte=cte_name):
688                if cte_name not in ctes:
689                    _raise_error(
690                        f"No CTE named {cte_name} found in model {self.model.name}", self.path
691                    )
692
693                cte_query = ctes[cte_name].this
694
695                sort = cte_query.args.get("order") is None
696                partial = values.get("partial")
697
698                cte_query = exp.select(*_projection_identifiers(cte_query)).from_(cte_name)
699                for alias, cte in ctes.items():
700                    cte_query = cte_query.with_(alias, cte.this, recursive=recursive)
701
702                with self._concurrent_render_context():
703                    # Similar to the model's query, we render the CTE query under the locked context
704                    # so that the execution (fetchdf) can continue concurrently between the threads
705                    sql = cte_query.sql(
706                        self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql
707                    )
708
709                actual = self._execute(sql)
710                expected = self._create_df(values, columns=cte_query.named_selects, partial=partial)
711
712                self.assert_equal(expected, actual, sort=sort, partial=partial)
713
714    def runTest(self) -> None:
715        with self._concurrent_render_context():
716            # Render the model's query and generate the SQL under the locked context so that
717            # execution (fetchdf) can continue concurrently between the threads
718            query = self._render_model_query()
719            sql = query.sql(self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql)
720
721        with_clause = query.args.get("with_")
722
723        if with_clause:
724            self.test_ctes(
725                {
726                    self._normalize_model_name(cte.alias, with_default_catalog=False): cte
727                    for cte in query.ctes
728                },
729                recursive=with_clause.recursive,
730            )
731
732        values = self.body["outputs"].get("query")
733        if values is not None:
734            partial = values.get("partial")
735            sort = query.args.get("order") is None
736
737            actual = self._execute(sql)
738            expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
739
740            self.assert_equal(expected, actual, sort=sort, partial=partial)
741
742    def _render_model_query(self) -> exp.Query:
743        variables = self.body.get("vars", {}).copy()
744        time_kwargs = {key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables}
745
746        query = self.model.render_query_or_raise(
747            **time_kwargs,
748            variables=variables,
749            engine_adapter=self.engine_adapter,
750            table_mapping={
751                name: self._test_fixture_table(name).sql() for name in self.body.get("inputs", {})
752            },
753            runtime_stage=RuntimeStage.TESTING,
754        )
755        return query

A class whose instances are single test cases.

By default, the test code itself should be placed in a method named 'runTest'.

If the fixture may be used for many test cases, create as many test methods as are needed. When instantiating such a TestCase subclass, specify in the constructor arguments the name of the test method that the instance is to execute.

Test authors should subclass TestCase for their own tests. Construction and deconstruction of the test's environment ('fixture') can be implemented by overriding the 'setUp' and 'tearDown' methods respectively.

If it is necessary to override the __init__ method, the base class __init__ method must always be called. It is important that subclasses should not change the signature of their __init__ method, since instances of the classes are instantiated automatically by parts of the framework in order to be run.

When subclassing TestCase, you can set these attributes:

failureException: determines which exception will be raised when the instance's assertion methods fail; test methods raising this exception will be deemed to have 'failed' rather than 'errored'.
longMessage: determines whether long messages (including repr of objects used in assert methods) will be printed on failure in addition to any explicit message passed.
maxDiff: sets the maximum length of a diff in failure messages by assert methods using difflib. It is looked up as an instance attribute so can be configured by individual tests if required.

def test_ctes( self, ctes: Dict[str, sqlglot.expressions.core.Expr], recursive: bool = False) -> None: View Source

684    def test_ctes(self, ctes: t.Dict[str, exp.Expr], recursive: bool = False) -> None:
685        """Run CTE queries and compare output to expected output"""
686        for cte_name, values in self.body["outputs"].get("ctes", {}).items():
687            with self.subTest(cte=cte_name):
688                if cte_name not in ctes:
689                    _raise_error(
690                        f"No CTE named {cte_name} found in model {self.model.name}", self.path
691                    )
692
693                cte_query = ctes[cte_name].this
694
695                sort = cte_query.args.get("order") is None
696                partial = values.get("partial")
697
698                cte_query = exp.select(*_projection_identifiers(cte_query)).from_(cte_name)
699                for alias, cte in ctes.items():
700                    cte_query = cte_query.with_(alias, cte.this, recursive=recursive)
701
702                with self._concurrent_render_context():
703                    # Similar to the model's query, we render the CTE query under the locked context
704                    # so that the execution (fetchdf) can continue concurrently between the threads
705                    sql = cte_query.sql(
706                        self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql
707                    )
708
709                actual = self._execute(sql)
710                expected = self._create_df(values, columns=cte_query.named_selects, partial=partial)
711
712                self.assert_equal(expected, actual, sort=sort, partial=partial)

Run CTE queries and compare output to expected output

def runTest(self) -> None: View Source

714    def runTest(self) -> None:
715        with self._concurrent_render_context():
716            # Render the model's query and generate the SQL under the locked context so that
717            # execution (fetchdf) can continue concurrently between the threads
718            query = self._render_model_query()
719            sql = query.sql(self._test_adapter_dialect, pretty=self.engine_adapter._pretty_sql)
720
721        with_clause = query.args.get("with_")
722
723        if with_clause:
724            self.test_ctes(
725                {
726                    self._normalize_model_name(cte.alias, with_default_catalog=False): cte
727                    for cte in query.ctes
728                },
729                recursive=with_clause.recursive,
730            )
731
732        values = self.body["outputs"].get("query")
733        if values is not None:
734            partial = values.get("partial")
735            sort = query.args.get("order") is None
736
737            actual = self._execute(sql)
738            expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
739
740            self.assert_equal(expected, actual, sort=sort, partial=partial)

Inherited Members

ModelTest: ModelTest; CONCURRENT_RENDER_LOCK; body; test_name; model; models; engine_adapter; path; preserve_fixtures; default_catalog; dialect; concurrency; verbosity; defaultTestResult; shortDescription; setUp; tearDown; assert_equal; path_relative_to; create_test
unittest.case.TestCase: failureException; longMessage; maxDiff; addTypeEqualityFunc; addCleanup; addClassCleanup; setUpClass; tearDownClass; countTestCases; id; subTest; run; doCleanups; doClassCleanups; debug; skipTest; fail; assertFalse; assertTrue; assertRaises; assertWarns; assertLogs; assertNoLogs; assertEqual; assertNotEqual; assertAlmostEqual; assertNotAlmostEqual; assertSequenceEqual; assertListEqual; assertTupleEqual; assertSetEqual; assertIn; assertNotIn; assertIs; assertIsNot; assertDictEqual; assertDictContainsSubset; assertCountEqual; assertMultiLineEqual; assertLess; assertLessEqual; assertGreater; assertGreaterEqual; assertIsNone; assertIsNotNone; assertIsInstance; assertNotIsInstance; assertRaisesRegex; assertWarnsRegex; assertRegex; assertNotRegex; failUnlessRaises; failIf; assertRaisesRegexp; assertRegexpMatches; assertNotRegexpMatches; failUnlessEqual; assertEquals; failIfEqual; assertNotEquals; failUnlessAlmostEqual; assertAlmostEquals; failIfAlmostEqual; assertNotAlmostEquals; failUnless; assert_

class PythonModelTest(ModelTest): View Source

758class PythonModelTest(ModelTest):
759    def __init__(
760        self,
761        body: t.Dict[str, t.Any],
762        test_name: str,
763        model: Model,
764        models: UniqueKeyDict[str, Model],
765        engine_adapter: EngineAdapter,
766        dialect: str | None = None,
767        path: Path | None = None,
768        preserve_fixtures: bool = False,
769        default_catalog: str | None = None,
770        concurrency: bool = False,
771        verbosity: Verbosity = Verbosity.DEFAULT,
772    ) -> None:
773        """PythonModelTest encapsulates a unit test for a Python model.
774
775        Args:
776            body: A dictionary that contains test metadata like inputs and outputs.
777            test_name: The name of the test.
778            model: The Python model that is being tested.
779            models: All models to use for expansion and mapping of physical locations.
780            engine_adapter: The engine adapter to use.
781            dialect: The models' dialect, used for normalization purposes.
782            path: An optional path to the test definition yaml file.
783            preserve_fixtures: Preserve the fixture tables in the testing database, useful for debugging.
784        """
785        from sqlmesh.core.test.context import TestExecutionContext
786
787        super().__init__(
788            body,
789            test_name,
790            model,
791            models,
792            engine_adapter,
793            dialect,
794            path,
795            preserve_fixtures,
796            default_catalog,
797            concurrency,
798            verbosity,
799        )
800
801        self.context = TestExecutionContext(
802            engine_adapter=engine_adapter,
803            models=models,
804            test=self,
805            default_dialect=dialect,
806            default_catalog=default_catalog,
807        )
808
809    def runTest(self) -> None:
810        values = self.body["outputs"].get("query")
811        if values is not None:
812            partial = values.get("partial")
813
814            actual_df = self._execute_model()
815            actual_df.reset_index(drop=True, inplace=True)
816            expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
817
818            self.assert_equal(expected, actual_df, sort=True, partial=partial)
819
820    def _execute_model(self) -> pd.DataFrame:
821        """Executes the python model and returns a DataFrame."""
822        import pandas as pd
823
824        with self._concurrent_render_context():
825            variables = self.body.get("vars", {}).copy()
826            time_kwargs = {key: variables.pop(key) for key in TIME_KWARG_KEYS if key in variables}
827            df = next(self.model.render(context=self.context, variables=variables, **time_kwargs))
828
829        assert not isinstance(df, exp.Expr)
830        return df if isinstance(df, pd.DataFrame) else df.toPandas()

A class whose instances are single test cases.

By default, the test code itself should be placed in a method named 'runTest'.

When subclassing TestCase, you can set these attributes:

failureException: determines which exception will be raised when the instance's assertion methods fail; test methods raising this exception will be deemed to have 'failed' rather than 'errored'.
longMessage: determines whether long messages (including repr of objects used in assert methods) will be printed on failure in addition to any explicit message passed.
maxDiff: sets the maximum length of a diff in failure messages by assert methods using difflib. It is looked up as an instance attribute so can be configured by individual tests if required.

759    def __init__(
760        self,
761        body: t.Dict[str, t.Any],
762        test_name: str,
763        model: Model,
764        models: UniqueKeyDict[str, Model],
765        engine_adapter: EngineAdapter,
766        dialect: str | None = None,
767        path: Path | None = None,
768        preserve_fixtures: bool = False,
769        default_catalog: str | None = None,
770        concurrency: bool = False,
771        verbosity: Verbosity = Verbosity.DEFAULT,
772    ) -> None:
773        """PythonModelTest encapsulates a unit test for a Python model.
774
775        Args:
776            body: A dictionary that contains test metadata like inputs and outputs.
777            test_name: The name of the test.
778            model: The Python model that is being tested.
779            models: All models to use for expansion and mapping of physical locations.
780            engine_adapter: The engine adapter to use.
781            dialect: The models' dialect, used for normalization purposes.
782            path: An optional path to the test definition yaml file.
783            preserve_fixtures: Preserve the fixture tables in the testing database, useful for debugging.
784        """
785        from sqlmesh.core.test.context import TestExecutionContext
786
787        super().__init__(
788            body,
789            test_name,
790            model,
791            models,
792            engine_adapter,
793            dialect,
794            path,
795            preserve_fixtures,
796            default_catalog,
797            concurrency,
798            verbosity,
799        )
800
801        self.context = TestExecutionContext(
802            engine_adapter=engine_adapter,
803            models=models,
804            test=self,
805            default_dialect=dialect,
806            default_catalog=default_catalog,
807        )

PythonModelTest encapsulates a unit test for a Python model.

Arguments:

body: A dictionary that contains test metadata like inputs and outputs.
test_name: The name of the test.
model: The Python model that is being tested.
models: All models to use for expansion and mapping of physical locations.
engine_adapter: The engine adapter to use.
dialect: The models' dialect, used for normalization purposes.
path: An optional path to the test definition yaml file.
preserve_fixtures: Preserve the fixture tables in the testing database, useful for debugging.

context

def runTest(self) -> None: View Source

809    def runTest(self) -> None:
810        values = self.body["outputs"].get("query")
811        if values is not None:
812            partial = values.get("partial")
813
814            actual_df = self._execute_model()
815            actual_df.reset_index(drop=True, inplace=True)
816            expected = self._create_df(values, columns=self.model.columns_to_types, partial=partial)
817
818            self.assert_equal(expected, actual_df, sort=True, partial=partial)

Inherited Members

ModelTest: CONCURRENT_RENDER_LOCK; body; test_name; model; models; engine_adapter; path; preserve_fixtures; default_catalog; dialect; concurrency; verbosity; defaultTestResult; shortDescription; setUp; tearDown; assert_equal; path_relative_to; create_test
unittest.case.TestCase: failureException; longMessage; maxDiff; addTypeEqualityFunc; addCleanup; addClassCleanup; setUpClass; tearDownClass; countTestCases; id; subTest; run; doCleanups; doClassCleanups; debug; skipTest; fail; assertFalse; assertTrue; assertRaises; assertWarns; assertLogs; assertNoLogs; assertEqual; assertNotEqual; assertAlmostEqual; assertNotAlmostEqual; assertSequenceEqual; assertListEqual; assertTupleEqual; assertSetEqual; assertIn; assertNotIn; assertIs; assertIsNot; assertDictEqual; assertDictContainsSubset; assertCountEqual; assertMultiLineEqual; assertLess; assertLessEqual; assertGreater; assertGreaterEqual; assertIsNone; assertIsNotNone; assertIsInstance; assertNotIsInstance; assertRaisesRegex; assertWarnsRegex; assertRegex; assertNotRegex; failUnlessRaises; failIf; assertRaisesRegexp; assertRegexpMatches; assertNotRegexpMatches; failUnlessEqual; assertEquals; failIfEqual; assertNotEquals; failUnlessAlmostEqual; assertAlmostEquals; failIfAlmostEqual; assertNotAlmostEquals; failUnless; assert_

833def generate_test(
834    model: Model,
835    input_queries: t.Dict[str, str],
836    models: UniqueKeyDict[str, Model],
837    engine_adapter: EngineAdapter,
838    test_engine_adapter: EngineAdapter,
839    project_path: Path,
840    overwrite: bool = False,
841    variables: t.Optional[t.Dict[str, str]] = None,
842    path: t.Optional[str] = None,
843    name: t.Optional[str] = None,
844    include_ctes: bool = False,
845) -> None:
846    """Generate a unit test fixture for a given model.
847
848    Args:
849        model: The model to test.
850        input_queries: Mapping of model names to queries. Each model included in this mapping
851            will be populated in the test based on the results of the corresponding query.
852        models: The context's models.
853        engine_adapter: The target engine adapter.
854        test_engine_adapter: The test engine adapter.
855        project_path: The path pointing to the project's root directory.
856        overwrite: Whether to overwrite the existing test in case of a file path collision.
857            When set to False, an error will be raised if there is such a collision.
858        variables: Key-value pairs that will define variables needed by the model.
859        path: The file path corresponding to the fixture, relative to the test directory.
860            By default, the fixture will be created under the test directory and the file name
861            will be inferred from the test's name.
862        name: The name of the test. This is inferred from the model name by default.
863        include_ctes: When true, CTE fixtures will also be generated.
864    """
865    import numpy as np
866
867    test_name = name or f"test_{model.view_name}"
868    path = path or f"{test_name}.yaml"
869
870    extension = path.split(".")[-1].lower()
871    if extension not in ("yaml", "yml"):
872        path = f"{path}.yaml"
873
874    fixture_path = project_path / c.TESTS / path
875    if not overwrite and fixture_path.exists():
876        raise ConfigError(
877            f"Fixture '{fixture_path}' already exists, make sure to set --overwrite if it can be safely overwritten."
878        )
879
880    # ruamel.yaml does not support pandas Timestamps, so we must convert them to python
881    # datetime or datetime.date objects based on column type
882    inputs = {
883        dep: pandas_timestamp_to_pydatetime(
884            engine_adapter.fetchdf(query).apply(lambda col: col.map(_normalize_df_value)),
885            models[dep].columns_to_types,
886        )
887        .replace({np.nan: None})
888        .to_dict(orient="records")
889        for dep, query in input_queries.items()
890    }
891    outputs: t.Dict[str, t.Any] = {"query": {}}
892    variables = variables or {}
893    test_body = {"model": model.fqn, "inputs": inputs, "outputs": outputs}
894
895    if variables:
896        test_body["vars"] = variables
897
898    test = ModelTest.create_test(
899        body=test_body.copy(),
900        test_name=test_name,
901        models=models,
902        engine_adapter=test_engine_adapter,
903        dialect=model.dialect,
904        path=fixture_path,
905        default_catalog=model.default_catalog,
906    )
907    if not test:
908        return
909
910    test.setUp()
911
912    if isinstance(model, SqlModel):
913        assert isinstance(test, SqlModelTest)
914        model_query = test._render_model_query()
915        with_clause = model_query.args.get("with_")
916
917        if with_clause and include_ctes:
918            ctes = {}
919            recursive = with_clause.recursive
920            previous_ctes: t.List[exp.CTE] = []
921
922            for cte in model_query.ctes:
923                cte_query = cte.this
924                cte_identifier = cte.args["alias"].this
925
926                cte_query = exp.select(*_projection_identifiers(cte_query)).from_(cte_identifier)
927
928                for prev in chain(previous_ctes, [cte]):
929                    cte_query = cte_query.with_(
930                        prev.args["alias"].this, prev.this, recursive=recursive
931                    )
932
933                cte_output = test._execute(cte_query)
934                ctes[cte.alias] = (
935                    pandas_timestamp_to_pydatetime(
936                        df=cte_output.apply(lambda col: col.map(_normalize_df_value)),
937                    )
938                    .replace({np.nan: None})
939                    .to_dict(orient="records")
940                )
941
942                previous_ctes.append(cte)
943
944            if ctes:
945                outputs["ctes"] = ctes
946
947        output = test._execute(model_query)
948    else:
949        output = t.cast(PythonModelTest, test)._execute_model()
950
951    outputs["query"] = (
952        pandas_timestamp_to_pydatetime(
953            output.apply(lambda col: col.map(_normalize_df_value)), model.columns_to_types
954        )
955        .replace({np.nan: None})
956        .to_dict(orient="records")
957    )
958
959    test.tearDown()
960
961    fixture_path.parent.mkdir(exist_ok=True, parents=True)
962    with open(fixture_path, "w", encoding="utf-8") as file:
963        yaml.dump({test_name: test_body}, file)

Generate a unit test fixture for a given model.

Arguments:

model: The model to test.
input_queries: Mapping of model names to queries. Each model included in this mapping will be populated in the test based on the results of the corresponding query.
models: The context's models.
engine_adapter: The target engine adapter.
test_engine_adapter: The test engine adapter.
project_path: The path pointing to the project's root directory.
overwrite: Whether to overwrite the existing test in case of a file path collision. When set to False, an error will be raised if there is such a collision.
variables: Key-value pairs that will define variables needed by the model.
path: The file path corresponding to the fixture, relative to the test directory. By default, the fixture will be created under the test directory and the file name will be inferred from the test's name.
name: The name of the test. This is inferred from the model name by default.
include_ctes: When true, CTE fixtures will also be generated.