Edit on GitHub

sqlmesh.core.model.definition

View Source

   1from __future__ import annotations
   2
   3import json
   4import logging
   5import types
   6import re
   7import typing as t
   8from functools import cached_property, partial
   9from pathlib import Path
  10
  11from pydantic import Field
  12from sqlglot import diff, exp
  13from sqlglot.diff import Insert
  14from sqlglot.helper import seq_get
  15from sqlglot.optimizer.qualify_columns import quote_identifiers
  16from sqlglot.optimizer.simplify import gen
  17from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
  18from sqlglot.schema import MappingSchema, nested_set
  19from sqlglot.time import format_time
  20
  21from sqlmesh.core import constants as c
  22from sqlmesh.core import dialect as d
  23from sqlmesh.core.audit import Audit, ModelAudit
  24from sqlmesh.core.node import IntervalUnit
  25from sqlmesh.core.macros import MacroRegistry, macro
  26from sqlmesh.core.model.common import (
  27    ParsableSql,
  28    make_python_env,
  29    parse_dependencies,
  30    parse_strings_with_macro_refs,
  31    single_value_or_tuple,
  32    sorted_python_env_payloads,
  33    validate_extra_and_required_fields,
  34)
  35from sqlmesh.core.model.meta import ModelMeta
  36from sqlmesh.core.model.kind import (
  37    ExternalKind,
  38    ModelKindName,
  39    SeedKind,
  40    ModelKind,
  41    FullKind,
  42    create_model_kind,
  43    CustomKind,
  44)
  45from sqlmesh.core.model.seed import CsvSeedReader, Seed, create_seed
  46from sqlmesh.core.renderer import ExpressionRenderer, QueryRenderer
  47from sqlmesh.core.signal import SignalRegistry
  48from sqlmesh.utils import columns_to_types_all_known, str_to_bool, UniqueKeyDict
  49from sqlmesh.utils.cron import CroniterCache
  50from sqlmesh.utils.date import TimeLike, make_inclusive, to_datetime, to_time_column
  51from sqlmesh.utils.errors import ConfigError, SQLMeshError, raise_config_error, PythonModelEvalError
  52from sqlmesh.utils.hashing import hash_data
  53from sqlmesh.utils.jinja import JinjaMacroRegistry, extract_macro_references_and_variables
  54from sqlmesh.utils.pydantic import PydanticModel, PRIVATE_FIELDS
  55from sqlmesh.utils.metaprogramming import (
  56    Executable,
  57    SqlValue,
  58    build_env,
  59    prepare_env,
  60    serialize_env,
  61    format_evaluated_code_exception,
  62)
  63
  64if t.TYPE_CHECKING:
  65    from sqlglot.dialects.dialect import DialectType
  66    from sqlmesh.core.node import _Node
  67    from sqlmesh.core._typing import Self, TableName, SessionProperties
  68    from sqlmesh.core.context import ExecutionContext
  69    from sqlmesh.core.engine_adapter import EngineAdapter
  70    from sqlmesh.core.engine_adapter._typing import QueryOrDF
  71    from sqlmesh.core.engine_adapter.shared import DataObjectType
  72    from sqlmesh.core.linter.rule import Rule
  73    from sqlmesh.core.snapshot import DeployabilityIndex, Node, Snapshot
  74    from sqlmesh.utils.jinja import MacroReference
  75
  76
  77logger = logging.getLogger(__name__)
  78
  79
  80PROPERTIES = {"physical_properties", "session_properties", "virtual_properties"}
  81
  82RUNTIME_RENDERED_MODEL_FIELDS = {
  83    "audits",
  84    "signals",
  85    "merge_filter",
  86} | PROPERTIES
  87
  88CRON_SHORTCUTS = {
  89    "@midnight",
  90    "@hourly",
  91    "@daily",
  92    "@weekly",
  93    "@monthly",
  94    "@yearly",
  95    "@annually",
  96}
  97
  98
  99class _Model(ModelMeta, frozen=True):
 100    """Model is the core abstraction for user defined datasets.
 101
 102    A model consists of logic that fetches the data (a SQL query, a Python script or a seed) and metadata
 103    associated with it. Models can be run on arbitrary cadences and support incremental or full refreshes.
 104    Models can also be materialized into physical tables or shared across other models as temporary views.
 105
 106    Example:
 107        MODEL (
 108            name           sushi.order_items,
 109            owner          jen,
 110            cron           '@daily',
 111            start          '2020-01-01',
 112            partitioned_by ds
 113        );
 114
 115        @DEF(var, 'my_var');
 116
 117        SELECT
 118          1 AS column_a # my first column,
 119          @var AS my_column # my second column,
 120        ;
 121
 122    Args:
 123        name: The name of the model, which is of the form [catalog].[db].table.
 124            The catalog and db are optional.
 125        dialect: The SQL dialect that the model's query is written in. By default,
 126            this is assumed to be the dialect of the context.
 127        owner: The owner of the model.
 128        cron: A cron string specifying how often the model should be refreshed, leveraging the
 129            [croniter](https://github.com/kiorky/croniter) library.
 130        description: The optional model description.
 131        stamp: An optional arbitrary string sequence used to create new model versions without making
 132            changes to any of the functional components of the definition.
 133        start: The earliest date that the model will be backfilled for. If this is None,
 134            then the date is inferred by taking the most recent start date of its ancestors.
 135            The start date can be a static datetime or a relative datetime like "1 year ago"
 136        end: The date that the model will be backfilled up until. Follows the same syntax as 'start',
 137            should be omitted if there is no end date.
 138        lookback: The number of previous incremental intervals in the lookback window.
 139        table_format: The table format used to manage the physical table files defined by `storage_format`, only applicable in certain engines.
 140            (eg, 'iceberg', 'delta', 'hudi')
 141        storage_format: The storage format used to store the physical table, only applicable in certain engines.
 142            (eg. 'parquet', 'orc')
 143        partitioned_by: The partition columns or engine specific expressions, only applicable in certain engines. (eg. (ds, hour))
 144        clustered_by: The cluster columns or engine specific expressions, only applicable in certain engines. (eg. (ds, hour))
 145        python_env: Dictionary containing all global variables needed to render the model's macros.
 146        mapping_schema: The schema of table names to column and types.
 147        extract_dependencies_from_query: Whether to extract additional dependencies from the rendered model's query.
 148        physical_schema_override: The desired physical schema name override.
 149    """
 150
 151    python_env: t.Dict[str, Executable] = {}
 152    jinja_macros: JinjaMacroRegistry = JinjaMacroRegistry()
 153    audit_definitions: t.Dict[str, ModelAudit] = {}
 154    mapping_schema: t.Dict[str, t.Any] = {}
 155    extract_dependencies_from_query: bool = True
 156    pre_statements_: t.Optional[t.List[ParsableSql]] = Field(default=None, alias="pre_statements")
 157    post_statements_: t.Optional[t.List[ParsableSql]] = Field(default=None, alias="post_statements")
 158    on_virtual_update_: t.Optional[t.List[ParsableSql]] = Field(
 159        default=None, alias="on_virtual_update"
 160    )
 161
 162    _full_depends_on: t.Optional[t.Set[str]] = None
 163    _statement_renderer_cache: t.Dict[int, ExpressionRenderer] = {}
 164    _is_metadata_only_change_cache: t.Dict[int, bool] = {}
 165
 166    _expressions_validator = ParsableSql.validator()
 167
 168    def __getstate__(self) -> t.Dict[t.Any, t.Any]:
 169        state = super().__getstate__()
 170        private = state[PRIVATE_FIELDS]
 171        private["_statement_renderer_cache"] = {}
 172        return state
 173
 174    def copy(self, **kwargs: t.Any) -> Self:
 175        model = super().copy(**kwargs)
 176        model._statement_renderer_cache = {}
 177        return model
 178
 179    def render(
 180        self,
 181        *,
 182        context: ExecutionContext,
 183        start: t.Optional[TimeLike] = None,
 184        end: t.Optional[TimeLike] = None,
 185        execution_time: t.Optional[TimeLike] = None,
 186        **kwargs: t.Any,
 187    ) -> t.Iterator[QueryOrDF]:
 188        """Renders the content of this model in a form of either a SELECT query, executing which the data for this model can
 189        be fetched, or a dataframe object which contains the data itself.
 190
 191        The type of the returned object (query or dataframe) depends on whether the model was sourced from a SQL query,
 192        a Python script or a pre-built dataset (seed).
 193
 194        Args:
 195            context: The execution context used for fetching data.
 196            start: The start date/time of the run.
 197            end: The end date/time of the run.
 198            execution_time: The date/time time reference to use for execution time.
 199
 200        Returns:
 201            A generator which yields either a query object or one of the supported dataframe objects.
 202        """
 203        yield self.render_query_or_raise(
 204            start=start,
 205            end=end,
 206            execution_time=execution_time,
 207            snapshots=context.snapshots,
 208            deployability_index=context.deployability_index,
 209            engine_adapter=context.engine_adapter,
 210            **kwargs,
 211        )
 212
 213    def render_definition(
 214        self,
 215        include_python: bool = True,
 216        include_defaults: bool = False,
 217        render_query: bool = False,
 218    ) -> t.List[exp.Expr]:
 219        """Returns the original list of sql expressions comprising the model definition.
 220
 221        Args:
 222            include_python: Whether or not to include Python code in the rendered definition.
 223        """
 224        expressions = []
 225        comment = None
 226        for field_name, field_info in ModelMeta.all_field_infos().items():
 227            field_value = getattr(self, field_name)
 228
 229            if (include_defaults and field_value) or field_value != field_info.default:
 230                if field_name == "description":
 231                    comment = field_value
 232                elif field_name == "kind":
 233                    expressions.append(
 234                        exp.Property(
 235                            this="kind",
 236                            value=field_value.to_expression(dialect=self.dialect),
 237                        )
 238                    )
 239                elif field_name == "name":
 240                    expressions.append(
 241                        exp.Property(
 242                            this=field_name,
 243                            value=exp.to_table(field_value, dialect=self.dialect),
 244                        )
 245                    )
 246                elif field_name not in ("default_catalog", "enabled", "ignored_rules_"):
 247                    expressions.append(
 248                        exp.Property(
 249                            this=field_info.alias or field_name,
 250                            value=META_FIELD_CONVERTER.get(field_name, exp.to_identifier)(
 251                                field_value
 252                            ),
 253                        )
 254                    )
 255
 256        model = d.Model(expressions=expressions)
 257        model.comments = [comment] if comment else None
 258
 259        jinja_expressions = []
 260        python_expressions = []
 261        if include_python:
 262            python_env = d.PythonCode(expressions=sorted_python_env_payloads(self.python_env))
 263            if python_env.expressions:
 264                python_expressions.append(python_env)
 265
 266            jinja_expressions = self.jinja_macros.to_expressions()
 267
 268        return [
 269            model,
 270            *python_expressions,
 271            *jinja_expressions,
 272        ]
 273
 274    def render_query(
 275        self,
 276        *,
 277        start: t.Optional[TimeLike] = None,
 278        end: t.Optional[TimeLike] = None,
 279        execution_time: t.Optional[TimeLike] = None,
 280        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
 281        table_mapping: t.Optional[t.Dict[str, str]] = None,
 282        expand: t.Iterable[str] = tuple(),
 283        deployability_index: t.Optional[DeployabilityIndex] = None,
 284        engine_adapter: t.Optional[EngineAdapter] = None,
 285        **kwargs: t.Any,
 286    ) -> t.Optional[exp.Query]:
 287        """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
 288
 289        Args:
 290            start: The start datetime to render. Defaults to epoch start.
 291            end: The end datetime to render. Defaults to epoch start.
 292            execution_time: The date/time time reference to use for execution time.
 293            snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations.
 294            table_mapping: Table mapping of physical locations. Takes precedence over snapshot mappings.
 295            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
 296                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
 297                end on the fly.
 298            deployability_index: Determines snapshots that are deployable in the context of this render.
 299            kwargs: Additional kwargs to pass to the renderer.
 300
 301        Returns:
 302            The rendered expression.
 303        """
 304        return exp.select(
 305            *(
 306                exp.cast(exp.Null(), column_type, copy=False).as_(name, copy=False, quoted=True)
 307                for name, column_type in (self.columns_to_types or {}).items()
 308            ),
 309            copy=False,
 310        ).from_(exp.values([tuple([1])], alias="t", columns=["dummy"]), copy=False)
 311
 312    def render_query_or_raise(
 313        self,
 314        *,
 315        start: t.Optional[TimeLike] = None,
 316        end: t.Optional[TimeLike] = None,
 317        execution_time: t.Optional[TimeLike] = None,
 318        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
 319        table_mapping: t.Optional[t.Dict[str, str]] = None,
 320        expand: t.Iterable[str] = tuple(),
 321        deployability_index: t.Optional[DeployabilityIndex] = None,
 322        engine_adapter: t.Optional[EngineAdapter] = None,
 323        **kwargs: t.Any,
 324    ) -> exp.Query:
 325        """Same as `render_query()` but raises an exception if the query can't be rendered.
 326
 327        Args:
 328            start: The start datetime to render. Defaults to epoch start.
 329            end: The end datetime to render. Defaults to epoch start.
 330            execution_time: The date/time time reference to use for execution time.
 331            snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
 332            table_mapping: Table mapping of physical locations. Takes precedence over snapshot mappings.
 333            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
 334                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
 335                end on the fly.
 336            deployability_index: Determines snapshots that are deployable in the context of this render.
 337            kwargs: Additional kwargs to pass to the renderer.
 338
 339        Returns:
 340            The rendered expression.
 341        """
 342        query = self.render_query(
 343            start=start,
 344            end=end,
 345            execution_time=execution_time,
 346            snapshots=snapshots,
 347            table_mapping=table_mapping,
 348            expand=expand,
 349            deployability_index=deployability_index,
 350            engine_adapter=engine_adapter,
 351            **kwargs,
 352        )
 353        if query is None:
 354            raise SQLMeshError(f"Failed to render query for model '{self.name}'.")
 355        return query
 356
 357    def render_pre_statements(
 358        self,
 359        *,
 360        start: t.Optional[TimeLike] = None,
 361        end: t.Optional[TimeLike] = None,
 362        execution_time: t.Optional[TimeLike] = None,
 363        snapshots: t.Optional[t.Collection[Snapshot]] = None,
 364        expand: t.Iterable[str] = tuple(),
 365        deployability_index: t.Optional[DeployabilityIndex] = None,
 366        engine_adapter: t.Optional[EngineAdapter] = None,
 367        inside_transaction: t.Optional[bool] = True,
 368        **kwargs: t.Any,
 369    ) -> t.List[exp.Expr]:
 370        """Renders pre-statements for a model.
 371
 372        Pre-statements are statements that preceded the model's SELECT query.
 373
 374        Args:
 375            start: The start datetime to render. Defaults to epoch start.
 376            end: The end datetime to render. Defaults to epoch start.
 377            execution_time: The date/time time reference to use for execution time.
 378            snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
 379            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
 380                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
 381                end on the fly.
 382            deployability_index: Determines snapshots that are deployable in the context of this render.
 383            kwargs: Additional kwargs to pass to the renderer.
 384
 385        Returns:
 386            The list of rendered expressions.
 387        """
 388        return self._render_statements(
 389            [
 390                stmt
 391                for stmt in self.pre_statements
 392                if stmt.args.get("transaction", True) == inside_transaction
 393            ],
 394            start=start,
 395            end=end,
 396            execution_time=execution_time,
 397            snapshots=snapshots,
 398            expand=expand,
 399            deployability_index=deployability_index,
 400            engine_adapter=engine_adapter,
 401            **kwargs,
 402        )
 403
 404    def render_post_statements(
 405        self,
 406        *,
 407        start: t.Optional[TimeLike] = None,
 408        end: t.Optional[TimeLike] = None,
 409        execution_time: t.Optional[TimeLike] = None,
 410        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
 411        expand: t.Iterable[str] = tuple(),
 412        deployability_index: t.Optional[DeployabilityIndex] = None,
 413        engine_adapter: t.Optional[EngineAdapter] = None,
 414        inside_transaction: t.Optional[bool] = True,
 415        **kwargs: t.Any,
 416    ) -> t.List[exp.Expr]:
 417        """Renders post-statements for a model.
 418
 419        Post-statements are statements that follow after the model's SELECT query.
 420
 421        Args:
 422            start: The start datetime to render. Defaults to epoch start.
 423            end: The end datetime to render. Defaults to epoch start.
 424            execution_time: The date/time time reference to use for execution time.
 425            snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
 426            expand: Expand referenced models as subqueries. This is used to bypass backfills when running queries
 427                that depend on materialized tables.  Model definitions are inlined and can thus be run end to
 428                end on the fly.
 429            deployability_index: Determines snapshots that are deployable in the context of this render.
 430            inside_transaction: Whether to render hooks with transaction=True (inside) or transaction=False (outside).
 431            kwargs: Additional kwargs to pass to the renderer.
 432
 433        Returns:
 434            The list of rendered expressions.
 435        """
 436        return self._render_statements(
 437            [
 438                stmt
 439                for stmt in self.post_statements
 440                if stmt.args.get("transaction", True) == inside_transaction
 441            ],
 442            start=start,
 443            end=end,
 444            execution_time=execution_time,
 445            snapshots=snapshots,
 446            expand=expand,
 447            deployability_index=deployability_index,
 448            engine_adapter=engine_adapter,
 449            **kwargs,
 450        )
 451
 452    def render_on_virtual_update(
 453        self,
 454        *,
 455        start: t.Optional[TimeLike] = None,
 456        end: t.Optional[TimeLike] = None,
 457        execution_time: t.Optional[TimeLike] = None,
 458        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
 459        expand: t.Iterable[str] = tuple(),
 460        deployability_index: t.Optional[DeployabilityIndex] = None,
 461        engine_adapter: t.Optional[EngineAdapter] = None,
 462        **kwargs: t.Any,
 463    ) -> t.List[exp.Expr]:
 464        return self._render_statements(
 465            self.on_virtual_update,
 466            start=start,
 467            end=end,
 468            execution_time=execution_time,
 469            snapshots=snapshots,
 470            expand=expand,
 471            deployability_index=deployability_index,
 472            engine_adapter=engine_adapter,
 473            **kwargs,
 474        )
 475
 476    def render_audit_query(
 477        self,
 478        audit: Audit,
 479        *,
 480        start: t.Optional[TimeLike] = None,
 481        end: t.Optional[TimeLike] = None,
 482        execution_time: t.Optional[TimeLike] = None,
 483        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
 484        deployability_index: t.Optional[DeployabilityIndex] = None,
 485        **kwargs: t.Any,
 486    ) -> exp.Query:
 487        from sqlmesh.core.snapshot import DeployabilityIndex
 488
 489        deployability_index = deployability_index or DeployabilityIndex.all_deployable()
 490        snapshot = (snapshots or {}).get(self.fqn)
 491
 492        this_model = kwargs.pop("this_model", None) or (
 493            snapshot.table_name(deployability_index.is_deployable(snapshot))
 494            if snapshot
 495            else self.fqn
 496        )
 497
 498        columns_to_types: t.Optional[t.Dict[str, t.Any]] = None
 499        if "engine_adapter" in kwargs:
 500            try:
 501                columns_to_types = kwargs["engine_adapter"].columns(this_model)
 502            except Exception:
 503                pass
 504
 505        if self.time_column:
 506            low, high = [
 507                self.convert_to_time_column(dt, columns_to_types)
 508                for dt in make_inclusive(start or c.EPOCH, end or c.EPOCH, self.dialect)
 509            ]
 510            where = self.time_column.column.between(low, high)
 511        else:
 512            where = None
 513
 514        # The model's name is already normalized, but in case of snapshots we also prepend a
 515        # case-sensitive physical schema name, so we quote here to ensure that we won't have
 516        # a broken schema reference after the resulting query is normalized in `render`.
 517        quoted_model_name = quote_identifiers(
 518            exp.to_table(this_model, dialect=self.dialect), dialect=self.dialect
 519        )
 520
 521        query_renderer = QueryRenderer(
 522            audit.query,
 523            audit.dialect or self.dialect,
 524            audit.macro_definitions,
 525            path=audit._path or Path(),
 526            jinja_macro_registry=audit.jinja_macros,
 527            python_env=self.python_env,
 528            only_execution_time=self.kind.only_execution_time,
 529            default_catalog=self.default_catalog,
 530        )
 531
 532        rendered_query = query_renderer.render(
 533            start=start,
 534            end=end,
 535            execution_time=execution_time,
 536            snapshots=snapshots,
 537            deployability_index=deployability_index,
 538            **{
 539                **audit.defaults,
 540                "this_model": exp.select("*").from_(quoted_model_name).where(where).subquery()
 541                if where is not None
 542                else quoted_model_name,
 543                **kwargs,
 544            },  # type: ignore
 545        )
 546
 547        if rendered_query is None:
 548            raise SQLMeshError(
 549                f"Failed to render query for audit '{audit.name}', model '{self.name}'."
 550            )
 551
 552        return rendered_query
 553
 554    @property
 555    def pre_statements(self) -> t.List[exp.Expr]:
 556        return self._get_parsed_statements("pre_statements_")
 557
 558    @property
 559    def post_statements(self) -> t.List[exp.Expr]:
 560        return self._get_parsed_statements("post_statements_")
 561
 562    @property
 563    def on_virtual_update(self) -> t.List[exp.Expr]:
 564        return self._get_parsed_statements("on_virtual_update_")
 565
 566    @property
 567    def macro_definitions(self) -> t.List[d.MacroDef]:
 568        """All macro definitions from the list of expressions."""
 569        return [
 570            s
 571            for s in self.pre_statements + self.post_statements + self.on_virtual_update
 572            if isinstance(s, d.MacroDef)
 573        ]
 574
 575    def _get_parsed_statements(self, attr_name: str) -> t.List[exp.Expr]:
 576        value = getattr(self, attr_name)
 577        if not value:
 578            return []
 579        result = []
 580        for v in value:
 581            parsed = v.parse(self.dialect)
 582            if getattr(v, "transaction", None) is not None:
 583                parsed.set("transaction", v.transaction)
 584            if not isinstance(parsed, exp.Semicolon):
 585                result.append(parsed)
 586        return result
 587
 588    def _render_statements(
 589        self,
 590        statements: t.Iterable[exp.Expr],
 591        **kwargs: t.Any,
 592    ) -> t.List[exp.Expr]:
 593        rendered = (
 594            self._statement_renderer(statement).render(**kwargs)
 595            for statement in statements
 596            if not isinstance(statement, d.MacroDef)
 597        )
 598        return [r for expressions in rendered if expressions for r in expressions]
 599
 600    def _statement_renderer(self, expression: exp.Expr) -> ExpressionRenderer:
 601        expression_key = id(expression)
 602        if expression_key not in self._statement_renderer_cache:
 603            self._statement_renderer_cache[expression_key] = ExpressionRenderer(
 604                expression,
 605                self.dialect,
 606                self.macro_definitions,
 607                path=self._path,
 608                jinja_macro_registry=self.jinja_macros,
 609                python_env=self.python_env,
 610                only_execution_time=False,
 611                default_catalog=self.default_catalog,
 612                model=self,
 613            )
 614        return self._statement_renderer_cache[expression_key]
 615
 616    def render_signals(
 617        self,
 618        *,
 619        start: t.Optional[TimeLike] = None,
 620        end: t.Optional[TimeLike] = None,
 621        execution_time: t.Optional[TimeLike] = None,
 622    ) -> t.List[t.Dict[str, str | int | float | bool]]:
 623        """Renders external; signals defined for this model.
 624
 625        Args:
 626            start: The start datetime to render. Defaults to epoch start.
 627            end: The end datetime to render. Defaults to epoch start.
 628            execution_time: The date/time time reference to use for execution time.
 629
 630        Returns:
 631            The list of rendered expressions.
 632        """
 633
 634        def _render(e: exp.Expr) -> str | int | float | bool:
 635            rendered_exprs = (
 636                self._create_renderer(e).render(start=start, end=end, execution_time=execution_time)
 637                or []
 638            )
 639            if len(rendered_exprs) != 1:
 640                raise SQLMeshError(f"Expected one expression but got {len(rendered_exprs)}")
 641
 642            rendered = rendered_exprs[0]
 643            if rendered.is_int:
 644                return int(rendered.this)
 645            if rendered.is_number:
 646                return float(rendered.this)
 647            if isinstance(rendered, (exp.Literal, exp.Boolean)):
 648                return rendered.this
 649            return rendered.sql(dialect=self.dialect)
 650
 651        # airflow only
 652        return [
 653            {k: _render(v) for k, v in signal.items()} for name, signal in self.signals if not name
 654        ]
 655
 656    def render_signal_calls(self) -> EvaluatableSignals:
 657        python_env = self.python_env
 658        env = prepare_env(python_env)
 659        signals_to_kwargs = {
 660            name: {
 661                k: seq_get(self._create_renderer(v).render() or [], 0) for k, v in kwargs.items()
 662            }
 663            for name, kwargs in self.signals
 664            if name
 665        }
 666
 667        return EvaluatableSignals(
 668            signals_to_kwargs=signals_to_kwargs,
 669            python_env=python_env,
 670            prepared_python_env=env,
 671        )
 672
 673    def render_merge_filter(
 674        self,
 675        *,
 676        start: t.Optional[TimeLike] = None,
 677        end: t.Optional[TimeLike] = None,
 678        execution_time: t.Optional[TimeLike] = None,
 679    ) -> t.Optional[exp.Expr]:
 680        if self.merge_filter is None:
 681            return None
 682        rendered_exprs = (
 683            self._create_renderer(self.merge_filter).render(
 684                start=start, end=end, execution_time=execution_time
 685            )
 686            or []
 687        )
 688        if len(rendered_exprs) != 1:
 689            raise SQLMeshError(f"Expected one expression but got {len(rendered_exprs)}")
 690        return rendered_exprs[0].transform(d.replace_merge_table_aliases, dialect=self.dialect)
 691
 692    def _render_properties(
 693        self, properties: t.Dict[str, exp.Expr] | SessionProperties, **render_kwargs: t.Any
 694    ) -> t.Dict[str, t.Any]:
 695        def _render(expression: exp.Expr) -> exp.Expr | None:
 696            # note: we use the _statement_renderer instead of _create_renderer because it sets model_fqn which
 697            # in turn makes @this_model available in the evaluation context
 698            rendered_exprs = self._statement_renderer(expression).render(**render_kwargs)
 699
 700            # Inform instead of raising for cases where a property is conditionally assigned
 701            if not rendered_exprs or rendered_exprs[0].sql().lower() in {"none", "null"}:
 702                logger.info(
 703                    f"Rendering '{expression.sql(dialect=self.dialect)}' did not return an expression"
 704                )
 705                return None
 706
 707            if len(rendered_exprs) != 1:
 708                raise SQLMeshError(
 709                    f"Expected one result when rendering '{expression.sql(dialect=self.dialect)}' but got {len(rendered_exprs)}"
 710                )
 711
 712            return rendered_exprs[0]
 713
 714        return {
 715            k: rendered
 716            for k, v in properties.items()
 717            if (rendered := (_render(v) if isinstance(v, exp.Expr) else v))
 718        }
 719
 720    def render_physical_properties(self, **render_kwargs: t.Any) -> t.Dict[str, t.Any]:
 721        rendered = self._render_properties(properties=self.physical_properties, **render_kwargs)
 722
 723        # Some engines (e.g. StarRocks) accept properties whose values reference other models and
 724        # need the physical table name rather than the logical view SQLMesh exposes. Resolve those.
 725        engine_adapter = render_kwargs.get("engine_adapter")
 726        resolve_keys: t.FrozenSet[str] = getattr(
 727            engine_adapter, "RESOLVE_TABLE_REFS_IN_PHYSICAL_PROPERTIES", frozenset()
 728        )
 729        keys_to_resolve = [key for key in resolve_keys if key in rendered]
 730        if keys_to_resolve:
 731            # Local import: sqlmesh.core.snapshot.definition imports _Model, so importing
 732            # to_table_mapping at module scope would be circular.
 733            from sqlmesh.core.snapshot.definition import to_table_mapping
 734
 735            table_mapping = to_table_mapping(
 736                (render_kwargs.get("snapshots") or {}).values(),
 737                render_kwargs.get("deployability_index"),
 738            )
 739            for key in keys_to_resolve:
 740                rendered[key] = _resolve_model_refs_to_physical_tables(
 741                    rendered[key], table_mapping, self.dialect
 742                )
 743
 744        return rendered
 745
 746    def render_virtual_properties(self, **render_kwargs: t.Any) -> t.Dict[str, t.Any]:
 747        return self._render_properties(properties=self.virtual_properties, **render_kwargs)
 748
 749    def render_session_properties(self, **render_kwargs: t.Any) -> t.Dict[str, t.Any]:
 750        return self._render_properties(properties=self.session_properties, **render_kwargs)
 751
 752    def _create_renderer(self, expression: exp.Expr) -> ExpressionRenderer:
 753        return ExpressionRenderer(
 754            expression,
 755            self.dialect,
 756            [],
 757            path=self._path,
 758            jinja_macro_registry=self.jinja_macros,
 759            python_env=self.python_env,
 760            only_execution_time=False,
 761            quote_identifiers=False,
 762        )
 763
 764    def ctas_query(self, **render_kwarg: t.Any) -> exp.Query:
 765        """Return a dummy query to do a CTAS.
 766
 767        If a model's column types are unknown, the only way to create the table is to
 768        run the fully expanded query. This can be expensive so we add a WHERE FALSE to all
 769        SELECTS and hopefully the optimizer is smart enough to not do anything.
 770
 771        Args:
 772            render_kwarg: Additional kwargs to pass to the renderer.
 773        Return:
 774            The mocked out ctas query.
 775        """
 776        query = self.render_query_or_raise(**render_kwarg).limit(0)
 777
 778        for select_or_set_op in query.find_all(exp.Select, exp.SetOperation):
 779            if isinstance(select_or_set_op, exp.Select) and select_or_set_op.args.get("from_"):
 780                select_or_set_op.where(exp.false(), copy=False)
 781
 782        if self.managed_columns:
 783            query.select(
 784                *[
 785                    exp.alias_(exp.cast(exp.Null(), to=col_type), col)
 786                    for col, col_type in self.managed_columns.items()
 787                    if col not in query.named_selects
 788                ],
 789                append=True,
 790                copy=False,
 791            )
 792        return query
 793
 794    def text_diff(self, other: Node, rendered: bool = False) -> str:
 795        """Produce a text diff against another node.
 796
 797        Args:
 798            other: The node to diff against.
 799            rendered: Whether the diff should compare raw vs rendered models
 800
 801        Returns:
 802            A unified text diff showing additions and deletions.
 803        """
 804        if not isinstance(other, _Model):
 805            raise SQLMeshError(
 806                f"Cannot diff model '{self.name} against a non-model node '{other.name}'"
 807            )
 808
 809        text_diff = d.text_diff(
 810            self.render_definition(render_query=rendered),
 811            other.render_definition(render_query=rendered),
 812            self.dialect,
 813            other.dialect,
 814        ).strip()
 815
 816        if not text_diff and not rendered:
 817            text_diff = d.text_diff(
 818                self.render_definition(render_query=True),
 819                other.render_definition(render_query=True),
 820                self.dialect,
 821                other.dialect,
 822            ).strip()
 823
 824        return text_diff
 825
 826    def set_time_format(self, default_time_format: str = c.DEFAULT_TIME_COLUMN_FORMAT) -> None:
 827        """Sets the default time format for a model.
 828
 829        Args:
 830            default_time_format: A python time format used as the default format when none is provided.
 831        """
 832        if not self.time_column:
 833            return
 834
 835        if self.time_column.format:
 836            # Transpile the time column format into the generic dialect
 837            formatted_time = format_time(
 838                self.time_column.format,
 839                d.Dialect.get_or_raise(self.dialect).TIME_MAPPING,
 840            )
 841            assert formatted_time is not None
 842            self.time_column.format = formatted_time
 843        else:
 844            self.time_column.format = default_time_format
 845
 846    def convert_to_time_column(
 847        self, time: TimeLike, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
 848    ) -> exp.Expr:
 849        """Convert a TimeLike object to the same time format and type as the model's time column."""
 850        if self.time_column:
 851            if columns_to_types is None:
 852                columns_to_types = self.columns_to_types_or_raise
 853
 854            if self.time_column.column.name not in columns_to_types:
 855                raise ConfigError(
 856                    f"Time column '{self.time_column.column.sql(dialect=self.dialect)}' not found in model '{self.name}'."
 857                )
 858
 859            time_column_type = columns_to_types[self.time_column.column.name]
 860
 861            return to_time_column(
 862                time,
 863                time_column_type,
 864                self.dialect,
 865                self.time_column.format,
 866            )
 867        return exp.convert(time)
 868
 869    def set_mapping_schema(self, schema: t.Dict) -> None:
 870        self.mapping_schema.clear()
 871        self.mapping_schema.update(schema)
 872
 873    def update_schema(self, schema: MappingSchema) -> None:
 874        """Updates the schema for this model's dependencies based on the given mapping schema."""
 875        for dep in self.depends_on:
 876            table = exp.to_table(dep)
 877            mapping_schema = schema.find(table)
 878
 879            if mapping_schema:
 880                nested_set(
 881                    self.mapping_schema,
 882                    tuple(part.sql(copy=False) for part in table.parts),
 883                    {col: dtype.sql(dialect=self.dialect) for col, dtype in mapping_schema.items()},
 884                )
 885
 886    @property
 887    def depends_on(self) -> t.Set[str]:
 888        """All of the upstream dependencies referenced in the model's query, excluding self references.
 889
 890        Returns:
 891            A list of all the upstream table names.
 892        """
 893        return self.full_depends_on - {self.fqn}
 894
 895    @property
 896    def columns_to_types(self) -> t.Optional[t.Dict[str, exp.DataType]]:
 897        """Returns the mapping of column names to types of this model."""
 898        if self.columns_to_types_ is None:
 899            return None
 900        return {**self.columns_to_types_, **self.managed_columns}
 901
 902    @property
 903    def columns_to_types_or_raise(self) -> t.Dict[str, exp.DataType]:
 904        """Returns the mapping of column names to types of this model or raise if not available."""
 905        columns_to_types = self.columns_to_types
 906        if columns_to_types is None:
 907            raise SQLMeshError(f"Column information is not available for model '{self.name}'")
 908        return columns_to_types
 909
 910    @property
 911    def annotated(self) -> bool:
 912        """Checks if all column projection types of this model are known."""
 913        if self.columns_to_types is None:
 914            return False
 915        columns_to_types = {
 916            k: v for k, v in self.columns_to_types.items() if k not in self.managed_columns
 917        }
 918        if not columns_to_types:
 919            return False
 920        return columns_to_types_all_known(columns_to_types)
 921
 922    @property
 923    def sorted_python_env(self) -> t.List[t.Tuple[str, Executable]]:
 924        """Returns the python env sorted by executable kind and then var name."""
 925        return sorted(self.python_env.items(), key=lambda x: (x[1].kind, x[0]))
 926
 927    @property
 928    def view_name(self) -> str:
 929        return self.fully_qualified_table.name
 930
 931    @property
 932    def schema_name(self) -> str:
 933        return self.fully_qualified_table.db or c.DEFAULT_SCHEMA
 934
 935    @property
 936    def physical_schema(self) -> str:
 937        return self.physical_schema_override or f"{c.SQLMESH}__{self.schema_name}"
 938
 939    @property
 940    def is_sql(self) -> bool:
 941        return False
 942
 943    @property
 944    def is_python(self) -> bool:
 945        return False
 946
 947    @property
 948    def is_seed(self) -> bool:
 949        return False
 950
 951    @property
 952    def depends_on_self(self) -> bool:
 953        return self.fqn in self.full_depends_on
 954
 955    @property
 956    def forward_only(self) -> bool:
 957        return getattr(self.kind, "forward_only", False)
 958
 959    @property
 960    def disable_restatement(self) -> bool:
 961        return getattr(self.kind, "disable_restatement", False)
 962
 963    @property
 964    def auto_restatement_intervals(self) -> t.Optional[int]:
 965        return getattr(self.kind, "auto_restatement_intervals", None)
 966
 967    @property
 968    def auto_restatement_cron(self) -> t.Optional[str]:
 969        return getattr(self.kind, "auto_restatement_cron", None)
 970
 971    def auto_restatement_croniter(self, value: TimeLike) -> CroniterCache:
 972        cron = self.auto_restatement_cron
 973        if cron is None:
 974            raise SQLMeshError("Auto restatement cron is not set.")
 975        return CroniterCache(cron, value)
 976
 977    @property
 978    def wap_supported(self) -> bool:
 979        return self.kind.is_materialized and (self.storage_format or "").lower() == "iceberg"
 980
 981    def validate_definition(self) -> None:
 982        """Validates the model's definition.
 983
 984        Raises:
 985            ConfigError
 986        """
 987
 988        for field in ("partitioned_by", "clustered_by"):
 989            values = getattr(self, field)
 990
 991            if values:
 992                values = [
 993                    col.name
 994                    for expr in values
 995                    if not (
 996                        field == "clustered_by"
 997                        and (self.dialect or "").lower() == "databricks"
 998                        and isinstance(expr, exp.Var)
 999                        and expr.name.upper() in c.LIQUID_CLUSTERING_KEYWORDS
1000                    )
1001                    for col in t.cast(
1002                        exp.Expr, exp.maybe_parse(expr, dialect=self.dialect)
1003                    ).find_all(exp.Column)
1004                ]
1005
1006                unique_keys = set(values)
1007
1008                if len(values) != len(unique_keys):
1009                    raise_config_error(
1010                        f"All keys in '{field}' must be unique in the model definition",
1011                        self._path,
1012                    )
1013
1014                columns_to_types = self.columns_to_types
1015                if columns_to_types is not None:
1016                    missing_keys = unique_keys - set(columns_to_types)
1017                    if missing_keys:
1018                        missing_keys_str = ", ".join(f"'{k}'" for k in sorted(missing_keys))
1019                        raise_config_error(
1020                            f"{field} keys [{missing_keys_str}] are missing in the model definition",
1021                            self._path,
1022                        )
1023
1024        if self.kind.is_incremental_by_time_range and not self.time_column:
1025            raise_config_error(
1026                "Incremental by time range models must have a time_column field",
1027                self._path,
1028            )
1029
1030        if (
1031            self.kind.is_incremental_unmanaged
1032            and getattr(self.kind, "insert_overwrite", False)
1033            and not self.partitioned_by_
1034        ):
1035            raise_config_error(
1036                "Unmanaged incremental models with insert / overwrite enabled must specify the partitioned_by field",
1037                self._path,
1038            )
1039
1040        if self.kind.is_managed:
1041            # TODO: would this sort of logic be better off moved into the Kind?
1042            if self.dialect == "snowflake" and "target_lag" not in self.physical_properties:
1043                raise_config_error(
1044                    "Snowflake managed tables must specify the 'target_lag' physical property",
1045                    self._path,
1046                )
1047
1048        if self.physical_version is not None and not self.forward_only:
1049            raise_config_error(
1050                "Pinning a physical version is only supported for forward only models",
1051                self._path,
1052            )
1053
1054        # The following attributes should be set only for SQL models
1055        if not self.is_sql:
1056            if self.optimize_query:
1057                raise_config_error(
1058                    "SQLMesh query optimizer can only be enabled for SQL models",
1059                    self._path,
1060                )
1061
1062        if isinstance(self.kind, CustomKind):
1063            from sqlmesh.core.snapshot.evaluator import get_custom_materialization_type_or_raise
1064
1065            # Will raise if the custom materialization points to an invalid class
1066            get_custom_materialization_type_or_raise(self.kind.materialization)
1067
1068        # Embedded model kind shouldn't have audits
1069        if self.kind.name == ModelKindName.EMBEDDED and self.audits:
1070            raise_config_error(
1071                "Audits are not supported for embedded models",
1072                self._path,
1073            )
1074
1075    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
1076        """Determines whether this model is a breaking change in relation to the `previous` model.
1077
1078        Args:
1079            previous: The previous model to compare against.
1080
1081        Returns:
1082            True if this model instance represents a breaking change, False if it's a non-breaking change
1083            and None if the nature of the change can't be determined.
1084        """
1085        raise NotImplementedError
1086
1087    def is_metadata_only_change(self, other: _Node) -> bool:
1088        if self._is_metadata_only_change_cache.get(id(other), None) is not None:
1089            return self._is_metadata_only_change_cache[id(other)]
1090
1091        is_metadata_change = True
1092        if (
1093            not isinstance(other, _Model)
1094            or self.metadata_hash == other.metadata_hash
1095            or self._data_hash_values_no_sql != other._data_hash_values_no_sql
1096        ):
1097            is_metadata_change = False
1098        else:
1099            this_statements = [
1100                s
1101                for s in [*self.pre_statements, *self.post_statements]
1102                if not self._is_metadata_statement(s)
1103            ]
1104            other_statements = [
1105                s
1106                for s in [*other.pre_statements, *other.post_statements]
1107                if not other._is_metadata_statement(s)
1108            ]
1109            if len(this_statements) != len(other_statements):
1110                is_metadata_change = False
1111            else:
1112                for this_statement, other_statement in zip(this_statements, other_statements):
1113                    this_rendered = (
1114                        self._statement_renderer(this_statement).render() or this_statement
1115                    )
1116                    other_rendered = (
1117                        other._statement_renderer(other_statement).render() or other_statement
1118                    )
1119                    if this_rendered != other_rendered:
1120                        is_metadata_change = False
1121                        break
1122
1123        self._is_metadata_only_change_cache[id(other)] = is_metadata_change
1124        return is_metadata_change
1125
1126    @property
1127    def data_hash(self) -> str:
1128        """
1129        Computes the data hash for the node.
1130
1131        Returns:
1132            The data hash for the node.
1133        """
1134        if self._data_hash is None:
1135            self._data_hash = hash_data(self._data_hash_values)
1136        return self._data_hash
1137
1138    @property
1139    def _data_hash_values(self) -> t.List[str]:
1140        return self._data_hash_values_no_sql + self._data_hash_values_sql
1141
1142    @property
1143    def _data_hash_values_sql(self) -> t.List[str]:
1144        data = []
1145
1146        for statements in [self.pre_statements_, self.post_statements_]:
1147            for statement in statements or []:
1148                data.append(statement.sql)
1149
1150        return data
1151
1152    @property
1153    def _data_hash_values_no_sql(self) -> t.List[str]:
1154        data = [
1155            str(  # Exclude metadata only macro funcs
1156                [(k, v) for k, v in self.sorted_python_env if not v.is_metadata]
1157            ),
1158            *self.kind.data_hash_values,
1159            self.table_format,
1160            self.storage_format,
1161            str(self.lookback),
1162            *(gen(expr) for expr in (self.partitioned_by or [])),
1163            *(gen(expr) for expr in (self.clustered_by or [])),
1164            self.stamp,
1165            self.physical_schema,
1166            self.physical_version,
1167            self.gateway,
1168            self.interval_unit.value if self.interval_unit is not None else None,
1169            str(self.optimize_query) if self.optimize_query is not None else None,
1170            self.virtual_environment_mode.value,
1171        ]
1172
1173        for column_name, column_type in (self.columns_to_types_ or {}).items():
1174            data.append(column_name)
1175            data.append(column_type.sql(dialect=self.dialect))
1176
1177        for key, value in (self.physical_properties or {}).items():
1178            data.append(key)
1179            data.append(gen(value))
1180
1181        return data  # type: ignore
1182
1183    def _audit_metadata_hash_values(self) -> t.List[str]:
1184        from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS
1185
1186        metadata = []
1187
1188        for audit_name, audit_args in sorted(self.audits, key=lambda a: a[0]):
1189            metadata.append(audit_name)
1190            if audit_name not in BUILT_IN_AUDITS:
1191                audit = self.audit_definitions[audit_name]
1192                metadata.extend(
1193                    [
1194                        audit.query_.sql,
1195                        audit.dialect,
1196                        str(audit.skip),
1197                        str(audit.blocking),
1198                    ]
1199                )
1200            for arg_name, arg_value in audit_args.items():
1201                metadata.append(arg_name)
1202                metadata.append(gen(arg_value))
1203
1204        return metadata
1205
1206    def audit_metadata_hash(self) -> str:
1207        return hash_data(self._audit_metadata_hash_values())
1208
1209    @property
1210    def metadata_hash(self) -> str:
1211        """
1212        Computes the metadata hash for the node.
1213
1214        Returns:
1215            The metadata hash for the node.
1216        """
1217        if self._metadata_hash is None:
1218            metadata = [
1219                self.dialect,
1220                self.owner,
1221                self.description,
1222                json.dumps(self.column_descriptions, sort_keys=True),
1223                self.cron,
1224                self.cron_tz.key if self.cron_tz else None,
1225                str(self.start) if self.start else None,
1226                str(self.end) if self.end else None,
1227                str(self.retention) if self.retention else None,
1228                str(self.batch_size) if self.batch_size is not None else None,
1229                str(self.batch_concurrency) if self.batch_concurrency is not None else None,
1230                json.dumps(self.mapping_schema, sort_keys=True),
1231                *sorted(self.tags),
1232                *sorted(ref.json(sort_keys=True) for ref in self.all_references),
1233                *self.kind.metadata_hash_values,
1234                self.project,
1235                str(self.allow_partials),
1236                gen(self.session_properties_) if self.session_properties_ else None,
1237                *[gen(g) for g in self.grains],
1238                *self._audit_metadata_hash_values(),
1239                json.dumps(self.grants, sort_keys=True) if self.grants else None,
1240                self.grants_target_layer,
1241            ]
1242
1243            for key, value in (self.virtual_properties or {}).items():
1244                metadata.append(key)
1245                metadata.append(gen(value))
1246
1247            for signal_name, args in sorted(self.signals, key=lambda x: x[0]):
1248                metadata.append(signal_name)
1249                for k, v in sorted(args.items()):
1250                    metadata.append(f"{k}:{gen(v)}")
1251
1252            if self.dbt_node_info:
1253                metadata.append(self.dbt_node_info.json(sort_keys=True))
1254
1255            metadata.extend(self._additional_metadata)
1256
1257            self._metadata_hash = hash_data(metadata)
1258        return self._metadata_hash
1259
1260    @property
1261    def is_model(self) -> bool:
1262        """Return True if this is a model node"""
1263        return True
1264
1265    @property
1266    def grants_table_type(self) -> DataObjectType:
1267        """Get the table type for grants application (TABLE, VIEW, MATERIALIZED_VIEW).
1268
1269        Returns:
1270            The DataObjectType that should be used when applying grants to this model.
1271        """
1272        from sqlmesh.core.engine_adapter.shared import DataObjectType
1273
1274        if self.kind.is_view:
1275            if hasattr(self.kind, "materialized") and getattr(self.kind, "materialized", False):
1276                return DataObjectType.MATERIALIZED_VIEW
1277            return DataObjectType.VIEW
1278        if self.kind.is_managed:
1279            return DataObjectType.MANAGED_TABLE
1280        # All other materialized models are tables
1281        return DataObjectType.TABLE
1282
1283    @property
1284    def _additional_metadata(self) -> t.List[str]:
1285        additional_metadata = []
1286
1287        metadata_only_macros = [(k, v) for k, v in self.sorted_python_env if v.is_metadata]
1288        if metadata_only_macros:
1289            additional_metadata.append(str(metadata_only_macros))
1290
1291        for statements in [self.pre_statements_, self.post_statements_, self.on_virtual_update_]:
1292            for statement in statements or []:
1293                additional_metadata.append(statement.sql)
1294
1295        return additional_metadata
1296
1297    def _is_metadata_statement(self, statement: exp.Expr) -> bool:
1298        if isinstance(statement, d.MacroDef):
1299            return True
1300        if isinstance(statement, d.MacroFunc):
1301            target_macro = macro.get_registry().get(statement.name)
1302            if target_macro:
1303                return target_macro.metadata_only
1304            target_macro = self.python_env.get(statement.name)
1305            if target_macro:
1306                return bool(target_macro.is_metadata)
1307        return False
1308
1309    @property
1310    def full_depends_on(self) -> t.Set[str]:
1311        if not self.extract_dependencies_from_query:
1312            return self.depends_on_ or set()
1313        if self._full_depends_on is None:
1314            depends_on = self.depends_on_ or set()
1315
1316            query = self.render_query(needs_optimization=False)
1317            if query is not None:
1318                depends_on |= d.find_tables(
1319                    query, default_catalog=self.default_catalog, dialect=self.dialect
1320                )
1321            self._full_depends_on = depends_on
1322
1323        return self._full_depends_on
1324
1325    @property
1326    def partitioned_by(self) -> t.List[exp.Expr]:
1327        """Columns to partition the model by, including the time column if it is not already included."""
1328        if self.time_column and not self._is_time_column_in_partitioned_by:
1329            # This allows the user to opt out of automatic time_column injection
1330            # by setting `partition_by_time_column false` on the model kind
1331            if (
1332                hasattr(self.kind, "partition_by_time_column")
1333                and self.kind.partition_by_time_column
1334            ):
1335                return [
1336                    TIME_COL_PARTITION_FUNC.get(self.dialect, lambda x, y: x)(
1337                        self.time_column.column, self.columns_to_types
1338                    ),
1339                    *self.partitioned_by_,
1340                ]
1341        return self.partitioned_by_
1342
1343    @property
1344    def partition_interval_unit(self) -> t.Optional[IntervalUnit]:
1345        """The interval unit to use for partitioning if applicable."""
1346        # Only return the interval unit for partitioning if the partitioning
1347        # wasn't explicitly set by the user. Otherwise, the user-provided
1348        # value should always take precedence.
1349        if self.time_column and not self._is_time_column_in_partitioned_by:
1350            return self.interval_unit
1351        return None
1352
1353    @property
1354    def audits_with_args(self) -> t.List[t.Tuple[Audit, t.Dict[str, exp.Expr]]]:
1355        from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS
1356
1357        audits_by_name = {**BUILT_IN_AUDITS, **self.audit_definitions}
1358        audits_with_args = []
1359        added_audits = set()
1360
1361        for audit_name, audit_args in self.audits:
1362            audits_with_args.append((audits_by_name[audit_name], audit_args.copy()))
1363            added_audits.add(audit_name)
1364
1365        for audit_name in self.audit_definitions:
1366            if audit_name not in added_audits:
1367                audits_with_args.append((audits_by_name[audit_name], {}))
1368
1369        return audits_with_args
1370
1371    @property
1372    def _is_time_column_in_partitioned_by(self) -> bool:
1373        return self.time_column is not None and self.time_column.column in {
1374            col for expr in self.partitioned_by_ for col in expr.find_all(exp.Column)
1375        }
1376
1377    @property
1378    def violated_rules_for_query(self) -> t.Dict[type[Rule], t.Any]:
1379        return {}
1380
1381
1382class SqlModel(_Model):
1383    """The model definition which relies on a SQL query to fetch the data.
1384
1385    Args:
1386        query: The main query representing the model.
1387        pre_statements: The list of SQL statements that precede the model's query.
1388        post_statements: The list of SQL statements that follow after the model's query.
1389        on_virtual_update: The list of SQL statements to be executed after the virtual update.
1390    """
1391
1392    query_: ParsableSql = Field(alias="query")
1393    source_type: t.Literal["sql"] = "sql"
1394
1395    _columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
1396
1397    def __getstate__(self) -> t.Dict[t.Any, t.Any]:
1398        state = super().__getstate__()
1399        state["__dict__"] = state["__dict__"].copy()
1400        # query renderer is very expensive to serialize
1401        state["__dict__"].pop("_query_renderer", None)
1402        state["__dict__"].pop("column_descriptions", None)
1403        private = state[PRIVATE_FIELDS]
1404        private["_columns_to_types"] = None
1405        return state
1406
1407    def copy(self, **kwargs: t.Any) -> Self:
1408        model = super().copy(**kwargs)
1409        model.__dict__.pop("_query_renderer", None)
1410        model.__dict__.pop("column_descriptions", None)
1411        model._columns_to_types = None
1412        if kwargs.get("update", {}).keys() & {"depends_on_", "query"}:
1413            model._full_depends_on = None
1414        return model
1415
1416    @property
1417    def query(self) -> t.Union[exp.Query, d.JinjaQuery, d.MacroFunc]:
1418        parsed_query = self.query_.parse(self.dialect)
1419        return t.cast(t.Union[exp.Query, d.JinjaQuery, d.MacroFunc], parsed_query)
1420
1421    def render_query(
1422        self,
1423        *,
1424        start: t.Optional[TimeLike] = None,
1425        end: t.Optional[TimeLike] = None,
1426        execution_time: t.Optional[TimeLike] = None,
1427        snapshots: t.Optional[t.Dict[str, Snapshot]] = None,
1428        table_mapping: t.Optional[t.Dict[str, str]] = None,
1429        expand: t.Iterable[str] = tuple(),
1430        deployability_index: t.Optional[DeployabilityIndex] = None,
1431        engine_adapter: t.Optional[EngineAdapter] = None,
1432        **kwargs: t.Any,
1433    ) -> t.Optional[exp.Query]:
1434        query = self._query_renderer.render(
1435            start=start,
1436            end=end,
1437            execution_time=execution_time,
1438            snapshots=snapshots,
1439            table_mapping=table_mapping,
1440            expand=expand,
1441            deployability_index=deployability_index,
1442            engine_adapter=engine_adapter,
1443            **kwargs,
1444        )
1445
1446        return query
1447
1448    def render_definition(
1449        self,
1450        include_python: bool = True,
1451        include_defaults: bool = False,
1452        render_query: bool = False,
1453    ) -> t.List[exp.Expr]:
1454        result: t.List[exp.Expr] = super().render_definition(
1455            include_python=include_python, include_defaults=include_defaults
1456        )
1457
1458        if render_query:
1459            result.extend(self.render_pre_statements())
1460            result.append(self.render_query() or self.query)
1461            result.extend(self.render_post_statements())
1462            if virtual_update := self.render_on_virtual_update():
1463                result.append(d.VirtualUpdateStatement(expressions=virtual_update))
1464        else:
1465            result.extend(self.pre_statements)
1466            result.append(self.query)
1467            result.extend(self.post_statements)
1468            if self.on_virtual_update:
1469                result.append(d.VirtualUpdateStatement(expressions=self.on_virtual_update))
1470
1471        return result
1472
1473    @property
1474    def is_sql(self) -> bool:
1475        return True
1476
1477    @property
1478    def columns_to_types(self) -> t.Optional[t.Dict[str, exp.DataType]]:
1479        if self.columns_to_types_ is not None:
1480            self._columns_to_types = self.columns_to_types_
1481        elif self._columns_to_types is None:
1482            try:
1483                query = self._query_renderer.render()
1484            except Exception:
1485                logger.exception("Failed to render query for model %s", self.fqn)
1486                return None
1487
1488            if query is None:
1489                return None
1490
1491            unknown = exp.DataType.build("unknown")
1492
1493            columns_to_types = {}
1494            for select in query.selects:
1495                output_name = select.output_name
1496
1497                # If model validation is disabled, we cannot assume that projections
1498                # will have inferrable output names or even that they will be unique
1499                if not output_name or output_name in columns_to_types:
1500                    return None
1501
1502                # copy data type because it is used in the engine to build CTAS and other queries
1503                # this can change the parent which will mess up the diffing algo
1504                columns_to_types[output_name] = (select.type or unknown).copy()
1505
1506            self._columns_to_types = columns_to_types
1507
1508        if "*" in self._columns_to_types:
1509            return None
1510
1511        return {**self._columns_to_types, **self.managed_columns}
1512
1513    @cached_property
1514    def column_descriptions(self) -> t.Dict[str, str]:
1515        if self.column_descriptions_ is not None:
1516            return self.column_descriptions_
1517
1518        query = self.render_query()
1519        if query is None:
1520            return {}
1521
1522        return {
1523            select.alias_or_name: select.comments[-1].strip()
1524            for select in query.selects
1525            if select.comments
1526        }
1527
1528    def set_mapping_schema(self, schema: t.Dict) -> None:
1529        super().set_mapping_schema(schema)
1530        self._on_mapping_schema_set()
1531
1532    def update_schema(self, schema: MappingSchema) -> None:
1533        super().update_schema(schema)
1534        self._on_mapping_schema_set()
1535
1536    def _on_mapping_schema_set(self) -> None:
1537        self._columns_to_types = None
1538        self._query_renderer.update_schema(self.mapping_schema)
1539
1540    def validate_definition(self) -> None:
1541        query = self._query_renderer.render()
1542        if query is None:
1543            if self.depends_on_ is None:
1544                raise_config_error(
1545                    "Dependencies must be provided explicitly for models that can be rendered only at runtime",
1546                    self._path,
1547                )
1548            return
1549
1550        if not isinstance(query, exp.Query):
1551            raise_config_error("Missing SELECT query in the model definition", self._path)
1552
1553        projection_list = query.selects
1554        if not projection_list:
1555            raise_config_error("Query missing select statements", self._path)
1556
1557        if self.depends_on_self and not self.annotated:
1558            raise_config_error(
1559                "Self-referencing models require inferrable column types. There are three options available to mitigate this issue: add explicit types to all projections in the outermost SELECT statement, leverage external models (https://sqlmesh.readthedocs.io/en/stable/concepts/models/external_models/), or use the `columns` model attribute (https://sqlmesh.readthedocs.io/en/stable/concepts/models/overview/#columns).",
1560                self._path,
1561            )
1562
1563        super().validate_definition()
1564
1565    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
1566        if not isinstance(previous, SqlModel):
1567            return None
1568
1569        if self.lookback != previous.lookback:
1570            return None
1571
1572        try:
1573            # the previous model which comes from disk could be unrenderable
1574            previous_query = previous.render_query()
1575        except Exception:
1576            previous_query = None
1577        this_query = self.render_query()
1578
1579        if previous_query is None or this_query is None:
1580            # Can't determine if there's a breaking change if we can't render the query.
1581            return None
1582
1583        if previous_query is this_query:
1584            edits = []
1585        else:
1586            edits = diff(
1587                previous_query,
1588                this_query,
1589                matchings=[(previous_query, this_query)],
1590                delta_only=True,
1591                dialect=self.dialect if self.dialect == previous.dialect else None,
1592            )
1593        inserted_expressions = {e.expression for e in edits if isinstance(e, Insert)}
1594
1595        for edit in edits:
1596            if not isinstance(edit, Insert):
1597                return _additive_projection_change(previous_query, this_query, self.dialect)
1598
1599            expr = edit.expression
1600            if isinstance(expr, exp.UDTF):
1601                # projection subqueries do not change cardinality, engines don't allow these to return
1602                # more than one row of data
1603                parent = expr.find_ancestor(exp.Subquery)
1604
1605                if not parent:
1606                    return None
1607
1608                expr = parent
1609
1610            if not _is_projection(expr) and expr.parent not in inserted_expressions:
1611                return _additive_projection_change(previous_query, this_query, self.dialect)
1612
1613        return False
1614
1615    def is_metadata_only_change(self, previous: _Node) -> bool:
1616        if self._is_metadata_only_change_cache.get(id(previous), None) is not None:
1617            return self._is_metadata_only_change_cache[id(previous)]
1618
1619        if not super().is_metadata_only_change(previous):
1620            return False
1621
1622        if not isinstance(previous, SqlModel):
1623            self._is_metadata_only_change_cache[id(previous)] = False
1624            return False
1625
1626        this_rendered_query = self.render_query() or self.query
1627        previous_rendered_query = previous.render_query() or previous.query
1628        is_metadata_change = this_rendered_query == previous_rendered_query
1629
1630        self._is_metadata_only_change_cache[id(previous)] = is_metadata_change
1631        return is_metadata_change
1632
1633    @cached_property
1634    def _query_renderer(self) -> QueryRenderer:
1635        no_quote_identifiers = self.kind.is_view and self.dialect in ("trino", "spark")
1636        return QueryRenderer(
1637            self.query,
1638            self.dialect,
1639            self.macro_definitions,
1640            schema=self.mapping_schema,
1641            path=self._path,
1642            jinja_macro_registry=self.jinja_macros,
1643            python_env=self.python_env,
1644            only_execution_time=self.kind.only_execution_time,
1645            default_catalog=self.default_catalog,
1646            quote_identifiers=not no_quote_identifiers,
1647            optimize_query=self.optimize_query,
1648            model=self,
1649        )
1650
1651    @property
1652    def _data_hash_values_no_sql(self) -> t.List[str]:
1653        return [
1654            *super()._data_hash_values_no_sql,
1655            *self.jinja_macros.data_hash_values,
1656        ]
1657
1658    @property
1659    def _data_hash_values_sql(self) -> t.List[str]:
1660        return [
1661            *super()._data_hash_values_sql,
1662            self.query_.sql,
1663        ]
1664
1665    @property
1666    def _additional_metadata(self) -> t.List[str]:
1667        return [*super()._additional_metadata, self.query_.sql]
1668
1669    @property
1670    def violated_rules_for_query(self) -> t.Dict[type[Rule], t.Any]:
1671        self.render_query()
1672        return self._query_renderer._violated_rules
1673
1674
1675class SeedModel(_Model):
1676    """The model definition which uses a pre-built static dataset to source the data from.
1677
1678    Args:
1679        seed: The content of a pre-built static dataset.
1680    """
1681
1682    kind: SeedKind
1683    seed: Seed
1684    column_hashes_: t.Optional[t.Dict[str, str]] = Field(default=None, alias="column_hashes")
1685    derived_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
1686    is_hydrated: bool = True
1687    source_type: t.Literal["seed"] = "seed"
1688
1689    def __getstate__(self) -> t.Dict[t.Any, t.Any]:
1690        state = super().__getstate__()
1691        state["__dict__"] = state["__dict__"].copy()
1692        state["__dict__"].pop("_reader", None)
1693        return state
1694
1695    def copy(self, **kwargs: t.Any) -> Self:
1696        model = super().copy(**kwargs)
1697        model.__dict__.pop("_reader", None)
1698        return model
1699
1700    def render(
1701        self,
1702        *,
1703        context: ExecutionContext,
1704        start: t.Optional[TimeLike] = None,
1705        end: t.Optional[TimeLike] = None,
1706        execution_time: t.Optional[TimeLike] = None,
1707        **kwargs: t.Any,
1708    ) -> t.Iterator[QueryOrDF]:
1709        if not self.is_hydrated:
1710            return
1711        yield from self.render_seed()
1712
1713    def render_seed(self) -> t.Iterator[QueryOrDF]:
1714        import numpy as np
1715        import pandas as pd
1716
1717        self._ensure_hydrated()
1718
1719        date_columns = []
1720        datetime_columns = []
1721        bool_columns = []
1722        string_columns = []
1723
1724        columns_to_types = self.columns_to_types_ or {}
1725        column_names_to_check = set(columns_to_types)
1726        for name, tpe in columns_to_types.items():
1727            if tpe.this in (exp.DataType.Type.DATE, exp.DataType.Type.DATE32):
1728                date_columns.append(name)
1729            elif tpe.this in exp.DataType.TEMPORAL_TYPES:
1730                datetime_columns.append(name)
1731            elif tpe.is_type("boolean"):
1732                bool_columns.append(name)
1733            elif tpe.this in exp.DataType.TEXT_TYPES:
1734                string_columns.append(name)
1735
1736        for df in self._reader.read(batch_size=self.kind.batch_size):
1737            rename_dict = {}
1738            for column in columns_to_types:
1739                if column not in df:
1740                    normalized_name = normalize_identifiers(column, dialect=self.dialect).name
1741                    if normalized_name in df:
1742                        rename_dict[normalized_name] = column
1743            if rename_dict:
1744                df.rename(columns=rename_dict, inplace=True)
1745                # These names have already been checked
1746                column_names_to_check -= set(rename_dict)
1747
1748            missing_columns = column_names_to_check - set(df.columns)
1749            if missing_columns:
1750                raise_config_error(
1751                    f"Seed model '{self.name}' has missing columns: {missing_columns}", self._path
1752                )
1753
1754            # convert all date/time types to native pandas timestamp
1755            for column in [*date_columns, *datetime_columns]:
1756                df[column] = pd.to_datetime(df[column], infer_datetime_format=True, errors="ignore")  # type: ignore
1757
1758            # extract datetime.date from pandas timestamp for DATE columns
1759            for column in date_columns:
1760                try:
1761                    df[column] = df[column].dt.date
1762                except Exception as ex:
1763                    logger.error(
1764                        "Failed to convert column '%s' to date in seed model '%s': %s",
1765                        column,
1766                        self.name,
1767                        ex,
1768                    )
1769
1770            for column in bool_columns:
1771                df[column] = df[column].apply(lambda i: None if pd.isna(i) else str_to_bool(str(i)))
1772
1773            df.loc[:, string_columns] = df[string_columns].mask(
1774                cond=lambda x: x.notna(),  # type: ignore
1775                other=df[string_columns].astype(str),  # type: ignore
1776            )
1777            yield df.replace({np.nan: None})
1778
1779    @property
1780    def columns_to_types(self) -> t.Optional[t.Dict[str, exp.DataType]]:
1781        if self.columns_to_types_ is not None:
1782            return self.columns_to_types_
1783        if self.derived_columns_to_types is not None:
1784            return self.derived_columns_to_types
1785        if self.is_hydrated:
1786            return self._reader.columns_to_types
1787        return None
1788
1789    @property
1790    def column_hashes(self) -> t.Dict[str, str]:
1791        if self.column_hashes_ is not None:
1792            return self.column_hashes_
1793        self._ensure_hydrated()
1794        return self._reader.column_hashes
1795
1796    @property
1797    def is_seed(self) -> bool:
1798        return True
1799
1800    @property
1801    def seed_path(self) -> Path:
1802        seed_path = Path(self.kind.path)
1803        if not seed_path.is_absolute():
1804            if self._path is None:
1805                raise SQLMeshError(f"Seed model '{self.name}' has no path")
1806            return self._path.parent / seed_path
1807        return seed_path
1808
1809    @property
1810    def depends_on(self) -> t.Set[str]:
1811        return (self.depends_on_ or set()) - {self.fqn}
1812
1813    @property
1814    def depends_on_self(self) -> bool:
1815        return False
1816
1817    @property
1818    def batch_size(self) -> t.Optional[int]:
1819        # Unlike other model kinds, the batch size provided in the SEED kind represents the
1820        # maximum number of rows to insert in a single batch.
1821        # We should never batch intervals for seed models.
1822        return None
1823
1824    def to_dehydrated(self) -> SeedModel:
1825        """Creates a dehydrated copy of this model.
1826
1827        The dehydrated seed model will not contain the seed content, but will contain
1828        the column hashes. This is useful for comparing two seed models without
1829        having to read the seed content from disk.
1830
1831        Returns:
1832            A dehydrated copy of this model.
1833        """
1834        if not self.is_hydrated:
1835            return self
1836
1837        return self.copy(
1838            update={
1839                "seed": Seed(content=""),
1840                "is_hydrated": False,
1841                "column_hashes_": self.column_hashes,
1842                "derived_columns_to_types": self.columns_to_types
1843                if self.columns_to_types_ is None
1844                else None,
1845            }
1846        )
1847
1848    def to_hydrated(self, content: str) -> SeedModel:
1849        """Creates a hydrated copy of this model with the given seed content.
1850
1851        Returns:
1852            A hydrated copy of this model.
1853        """
1854        if self.is_hydrated:
1855            return self
1856
1857        return self.copy(
1858            update={
1859                "seed": Seed(content=content),
1860                "is_hydrated": True,
1861                "column_hashes_": None,
1862            },
1863        )
1864
1865    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
1866        if not isinstance(previous, SeedModel):
1867            return None
1868
1869        new_columns = set(self.column_hashes)
1870        old_columns = set(previous.column_hashes)
1871
1872        if not new_columns.issuperset(old_columns):
1873            return None
1874
1875        for col in old_columns:
1876            if self.column_hashes[col] != previous.column_hashes[col]:
1877                return None
1878
1879        return False
1880
1881    def _ensure_hydrated(self) -> None:
1882        if not self.is_hydrated:
1883            raise SQLMeshError(f"Seed model '{self.name}' is not hydrated.")
1884
1885    @cached_property
1886    def _reader(self) -> CsvSeedReader:
1887        return self.seed.reader(dialect=self.dialect, settings=self.kind.csv_settings)
1888
1889    @property
1890    def _data_hash_values_no_sql(self) -> t.List[str]:
1891        data = super()._data_hash_values_no_sql
1892        for column_name, column_hash in self.column_hashes.items():
1893            data.append(column_name)
1894            data.append(column_hash)
1895
1896        # Include grants in data hash for seed models to force recreation on grant changes
1897        # since seed models don't support migration
1898        data.append(json.dumps(self.grants, sort_keys=True) if self.grants else "")
1899        data.append(self.grants_target_layer)
1900
1901        return data
1902
1903
1904class PythonModel(_Model):
1905    """The model definition which relies on a Python script to fetch the data.
1906
1907    Args:
1908        entrypoint: The name of a Python function which contains the data fetching / transformation logic.
1909    """
1910
1911    kind: ModelKind = FullKind()
1912    entrypoint: str
1913    source_type: t.Literal["python"] = "python"
1914
1915    def validate_definition(self) -> None:
1916        super().validate_definition()
1917
1918        if self.kind and not self.kind.supports_python_models:
1919            raise_config_error(
1920                f"Cannot create Python model '{self.name}' as the '{self.kind.name}' kind doesn't support Python models",
1921                self._path,
1922            )
1923
1924    def render(
1925        self,
1926        *,
1927        context: ExecutionContext,
1928        start: t.Optional[TimeLike] = None,
1929        end: t.Optional[TimeLike] = None,
1930        execution_time: t.Optional[TimeLike] = None,
1931        **kwargs: t.Any,
1932    ) -> t.Iterator[QueryOrDF]:
1933        env = prepare_env(self.python_env)
1934        start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH, self.dialect)
1935        execution_time = to_datetime(execution_time or c.EPOCH)
1936
1937        variables = {
1938            **env.get(c.SQLMESH_VARS, {}),
1939            **env.get(c.SQLMESH_VARS_METADATA, {}),
1940            **kwargs.pop("variables", {}),
1941        }
1942        blueprint_variables = {
1943            k: d.parse_one(v.sql, dialect=self.dialect) if isinstance(v, SqlValue) else v
1944            for k, v in {
1945                **env.get(c.SQLMESH_BLUEPRINT_VARS, {}),
1946                **env.get(c.SQLMESH_BLUEPRINT_VARS_METADATA, {}),
1947            }.items()
1948        }
1949        try:
1950            kwargs = {
1951                **variables,
1952                **kwargs,
1953                "start": start,
1954                "end": end,
1955                "execution_time": execution_time,
1956                "latest": execution_time,  # TODO: Preserved for backward compatibility. Remove in 1.0.0.
1957            }
1958            df_or_iter = env[self.entrypoint](
1959                context=context.with_variables(variables, blueprint_variables=blueprint_variables),
1960                **kwargs,
1961            )
1962
1963            if not isinstance(df_or_iter, types.GeneratorType):
1964                df_or_iter = [df_or_iter]
1965
1966            for df in df_or_iter:
1967                yield df
1968        except Exception as e:
1969            raise PythonModelEvalError(format_evaluated_code_exception(e, self.python_env))
1970
1971    def render_definition(
1972        self,
1973        include_python: bool = True,
1974        include_defaults: bool = False,
1975        render_query: bool = False,
1976    ) -> t.List[exp.Expr]:
1977        # Ignore the provided value for the include_python flag, since the Pyhon model's
1978        # definition without Python code is meaningless.
1979        return super().render_definition(
1980            include_python=True, include_defaults=include_defaults, render_query=render_query
1981        )
1982
1983    @property
1984    def is_python(self) -> bool:
1985        return True
1986
1987    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
1988        return None
1989
1990    @property
1991    def _data_hash_values_no_sql(self) -> t.List[str]:
1992        data = super()._data_hash_values_no_sql
1993        data.append(self.entrypoint)
1994        return data
1995
1996
1997class ExternalModel(_Model):
1998    """The model definition which represents an external source/table."""
1999
2000    kind: ModelKind = ExternalKind()
2001    source_type: t.Literal["external"] = "external"
2002
2003    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
2004        if not isinstance(previous, ExternalModel):
2005            return None
2006        if not previous.columns_to_types_or_raise.items() - self.columns_to_types_or_raise.items():
2007            return False
2008        return None
2009
2010    @property
2011    def depends_on(self) -> t.Set[str]:
2012        return set()
2013
2014    @property
2015    def depends_on_self(self) -> bool:
2016        return False
2017
2018
2019Model = t.Union[SqlModel, SeedModel, PythonModel, ExternalModel]
2020
2021
2022class AuditResult(PydanticModel):
2023    audit: Audit
2024    """The audit this result is for."""
2025    audit_args: t.Dict[t.Any, t.Any]
2026    """Arguments passed to the audit."""
2027    model: t.Optional[_Model] = None
2028    """The model this audit is for."""
2029    count: t.Optional[int] = None
2030    """The number of records returned by the audit query. This could be None if the audit was skipped."""
2031    query: t.Optional[exp.Expr] = None
2032    """The rendered query used by the audit. This could be None if the audit was skipped."""
2033    skipped: bool = False
2034    """Whether or not the audit was blocking. This can be overriden by the user."""
2035    blocking: bool = True
2036
2037
2038class EvaluatableSignals(PydanticModel):
2039    signals_to_kwargs: t.Dict[str, t.Dict[str, t.Optional[exp.Expr]]]
2040    """A mapping of signal names to the kwargs passed to the signal."""
2041    python_env: t.Dict[str, Executable]
2042    """The Python environment that should be used to evaluated the rendered signal calls."""
2043    prepared_python_env: t.Dict[str, t.Any]
2044    """The prepared Python environment that should be used to evaluated the rendered signal calls."""
2045
2046
2047def _extract_blueprints(blueprints: t.Any, path: Path) -> t.List[t.Any]:
2048    if not blueprints:
2049        return [None]
2050    if isinstance(blueprints, exp.Paren):
2051        return [blueprints.unnest()]
2052    if isinstance(blueprints, (exp.Tuple, exp.Array)):
2053        return blueprints.expressions
2054    if isinstance(blueprints, list):
2055        return blueprints
2056
2057    raise_config_error(
2058        "Expected a list or tuple consisting of key-value mappings for "
2059        f"the 'blueprints' property, got '{blueprints}' instead",
2060        path,
2061    )
2062    return []  # This is unreachable, but is done to satisfy mypy
2063
2064
2065def _extract_blueprint_variables(blueprint: t.Any, path: Path) -> t.Dict[str, t.Any]:
2066    if not blueprint:
2067        return {}
2068    if isinstance(blueprint, (exp.Paren, exp.PropertyEQ)):
2069        blueprint = blueprint.unnest()
2070        return {blueprint.left.name.lower(): blueprint.right}
2071    if isinstance(blueprint, (exp.Tuple, exp.Array)):
2072        return {e.left.name.lower(): e.right for e in blueprint.expressions}
2073    if isinstance(blueprint, dict):
2074        return {k.lower(): v for k, v in blueprint.items()}
2075
2076    raise_config_error(
2077        f"Expected a key-value mapping for the blueprint value, got '{blueprint}' instead",
2078        path,
2079    )
2080    return {}  # This is unreachable, but is done to satisfy mypy
2081
2082
2083def create_models_from_blueprints(
2084    gateway: t.Optional[str | exp.Expr],
2085    blueprints: t.Any,
2086    get_variables: t.Callable[[t.Optional[str]], t.Dict[str, str]],
2087    loader: t.Callable[..., Model],
2088    path: Path = Path(),
2089    module_path: Path = Path(),
2090    dialect: DialectType = None,
2091    default_catalog_per_gateway: t.Optional[t.Dict[str, str]] = None,
2092    **loader_kwargs: t.Any,
2093) -> t.List[Model]:
2094    model_blueprints: t.List[Model] = []
2095    original_default_catalog = loader_kwargs.get("default_catalog")
2096    for blueprint in _extract_blueprints(blueprints, path):
2097        loader_kwargs["default_catalog"] = original_default_catalog
2098        blueprint_variables = _extract_blueprint_variables(blueprint, path)
2099
2100        if gateway:
2101            rendered_gateway = render_expression(
2102                expression=exp.maybe_parse(gateway, dialect=dialect),
2103                module_path=module_path,
2104                macros=loader_kwargs.get("macros"),
2105                jinja_macros=loader_kwargs.get("jinja_macros"),
2106                path=path,
2107                dialect=dialect,
2108                default_catalog=loader_kwargs.get("default_catalog"),
2109                blueprint_variables=blueprint_variables,
2110            )
2111            gateway_name = rendered_gateway[0].name if rendered_gateway else None
2112        else:
2113            gateway_name = None
2114
2115        if default_catalog_per_gateway and gateway_name:
2116            catalog = default_catalog_per_gateway.get(gateway_name)
2117            if catalog is not None:
2118                loader_kwargs["default_catalog"] = catalog
2119            else:
2120                # Gateway exists but has no entry in the dict (e.g., catalog-unsupported
2121                # engines like ClickHouse). Clear the default catalog so the global
2122                # default from the primary gateway doesn't leak into this model's name.
2123                loader_kwargs["default_catalog"] = None
2124
2125        model_blueprints.append(
2126            loader(
2127                path=path,
2128                module_path=module_path,
2129                dialect=dialect,
2130                variables=get_variables(gateway_name),
2131                blueprint_variables=blueprint_variables,
2132                **loader_kwargs,
2133            )
2134        )
2135
2136    return model_blueprints
2137
2138
2139def load_sql_based_models(
2140    expressions: t.List[exp.Expr],
2141    get_variables: t.Callable[[t.Optional[str]], t.Dict[str, str]],
2142    path: Path = Path(),
2143    module_path: Path = Path(),
2144    dialect: DialectType = None,
2145    default_catalog_per_gateway: t.Optional[t.Dict[str, str]] = None,
2146    **loader_kwargs: t.Any,
2147) -> t.List[Model]:
2148    gateway: t.Optional[exp.Expr] = None
2149    blueprints: t.Optional[exp.Expr] = None
2150
2151    model_meta = seq_get(expressions, 0)
2152    for prop in (isinstance(model_meta, d.Model) and model_meta.expressions) or []:
2153        if prop.name == "gateway":
2154            gateway = prop.args["value"]
2155        elif prop.name == "blueprints":
2156            # We pop the `blueprints` here to avoid walking large lists when rendering the meta
2157            blueprints = prop.pop().args["value"]
2158
2159    if isinstance(blueprints, d.MacroFunc):
2160        rendered_blueprints = render_expression(
2161            expression=blueprints,
2162            module_path=module_path,
2163            macros=loader_kwargs.get("macros"),
2164            jinja_macros=loader_kwargs.get("jinja_macros"),
2165            variables=get_variables(None),
2166            path=path,
2167            dialect=dialect,
2168            default_catalog=loader_kwargs.get("default_catalog"),
2169        )
2170        if not rendered_blueprints:
2171            raise_config_error("Failed to render blueprints property", path)
2172
2173        # Help mypy see that rendered_blueprints can't be None
2174        assert rendered_blueprints
2175
2176        if len(rendered_blueprints) > 1:
2177            rendered_blueprints = [exp.Tuple(expressions=rendered_blueprints)]
2178
2179        blueprints = rendered_blueprints[0]
2180
2181    return create_models_from_blueprints(
2182        gateway=gateway,
2183        blueprints=blueprints,
2184        get_variables=get_variables,
2185        loader=partial(load_sql_based_model, expressions),
2186        path=path,
2187        module_path=module_path,
2188        dialect=dialect,
2189        default_catalog_per_gateway=default_catalog_per_gateway,
2190        **loader_kwargs,
2191    )
2192
2193
2194def load_sql_based_model(
2195    expressions: t.List[exp.Expr],
2196    *,
2197    defaults: t.Optional[t.Dict[str, t.Any]] = None,
2198    path: t.Optional[Path] = None,
2199    module_path: Path = Path(),
2200    time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT,
2201    macros: t.Optional[MacroRegistry] = None,
2202    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
2203    audits: t.Optional[t.Dict[str, ModelAudit]] = None,
2204    python_env: t.Optional[t.Dict[str, Executable]] = None,
2205    dialect: t.Optional[str] = None,
2206    physical_schema_mapping: t.Optional[t.Dict[re.Pattern, str]] = None,
2207    default_catalog: t.Optional[str] = None,
2208    variables: t.Optional[t.Dict[str, t.Any]] = None,
2209    infer_names: t.Optional[bool] = False,
2210    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
2211    **kwargs: t.Any,
2212) -> Model:
2213    """Load a model from a parsed SQLMesh model SQL file.
2214
2215    Args:
2216        expressions: Model, *Statements, Query.
2217        defaults: Definition default values.
2218        path: An optional path to the model definition file.
2219        module_path: The python module path to serialize macros for.
2220        time_column_format: The default time column format to use if no model time column is configured.
2221        macros: The custom registry of macros. If not provided the default registry will be used.
2222        jinja_macros: The registry of Jinja macros.
2223        python_env: The custom Python environment for macros. If not provided the environment will be constructed
2224            from the macro registry.
2225        dialect: The default dialect if no model dialect is configured.
2226            The format must adhere to Python's strftime codes.
2227        physical_schema_mapping: A mapping of regular expressions to match against the model schema to produce the corresponding physical schema
2228        default_catalog: The default catalog if no model catalog is configured.
2229        variables: The variables to pass to the model.
2230        kwargs: Additional kwargs to pass to the loader.
2231    """
2232    missing_model_msg = f"""Please add a MODEL block at the top of the file. Example:
2233
2234MODEL (
2235  name sqlmesh_example.full_model, --model name
2236  kind FULL, --materialization
2237  cron '@daily', --schedule
2238);
2239
2240Learn more at https://sqlmesh.readthedocs.io/en/stable/concepts/models/overview
2241"""
2242
2243    if not expressions:
2244        raise_config_error(missing_model_msg)
2245
2246    dialect = dialect or ""
2247    meta = expressions[0]
2248    if not isinstance(meta, d.Model):
2249        if not infer_names:
2250            raise_config_error(missing_model_msg)
2251        meta = d.Model(expressions=[])  # Dummy meta node
2252        expressions.insert(0, meta)
2253
2254    # We deliberately hold off rendering some properties at load time because there is not enough information available
2255    # at load time to render them. They will get rendered later at evaluation time
2256    unrendered_properties = {}
2257    unrendered_merge_filter = None
2258
2259    for prop in meta.expressions:
2260        # Macro functions that programmaticaly generate the key-value pair properties should be rendered
2261        # This is needed in the odd case where a macro shares the name of one of the properties
2262        # eg `@session_properties()` Test: `test_macros_in_model_statement` Reference PR: #2574
2263        if isinstance(prop, d.MacroFunc):
2264            continue
2265
2266        prop_name = prop.name.lower()
2267        if prop_name in {"signals", "audits"} | PROPERTIES:
2268            unrendered_properties[prop_name] = prop.args.get("value")
2269        elif (
2270            prop.name.lower() == "kind"
2271            and (value := prop.args.get("value"))
2272            and value.name.lower() == "incremental_by_unique_key"
2273        ):
2274            for kind_prop in value.expressions:
2275                if kind_prop.name.lower() == "merge_filter":
2276                    unrendered_merge_filter = kind_prop
2277
2278    rendered_meta_exprs = render_expression(
2279        expression=meta,
2280        module_path=module_path,
2281        macros=macros,
2282        jinja_macros=jinja_macros,
2283        variables=variables,
2284        path=path,
2285        dialect=dialect,
2286        default_catalog=default_catalog,
2287        blueprint_variables=blueprint_variables,
2288    )
2289
2290    if rendered_meta_exprs is None or len(rendered_meta_exprs) != 1:
2291        raise_config_error(
2292            f"Invalid MODEL statement:\n{meta.sql(dialect=dialect, pretty=True)}",
2293            path,
2294        )
2295        raise
2296
2297    rendered_meta = rendered_meta_exprs[0]
2298
2299    rendered_defaults = (
2300        render_model_defaults(
2301            defaults=defaults,
2302            module_path=module_path,
2303            macros=macros,
2304            jinja_macros=jinja_macros,
2305            variables=variables,
2306            path=path,
2307            dialect=dialect,
2308            default_catalog=default_catalog,
2309        )
2310        if defaults
2311        else {}
2312    )
2313
2314    rendered_defaults = parse_defaults_properties(rendered_defaults, dialect=dialect)
2315
2316    # Extract the query and any pre/post statements
2317    query_or_seed_insert, pre_statements, post_statements, on_virtual_update, inline_audits = (
2318        _split_sql_model_statements(expressions[1:], path, dialect=dialect)
2319    )
2320
2321    meta_fields: t.Dict[str, t.Any] = {
2322        "dialect": dialect,
2323        "description": (
2324            "\n".join(comment.strip() for comment in rendered_meta.comments)
2325            if rendered_meta.comments
2326            else None
2327        ),
2328        **{prop.name.lower(): prop.args.get("value") for prop in rendered_meta.expressions},
2329        **kwargs,
2330    }
2331
2332    # Discard the potentially half-rendered versions of these properties and replace them with the
2333    # original unrendered versions. They will get rendered properly at evaluation time
2334    meta_fields.update(unrendered_properties)
2335
2336    if unrendered_merge_filter:
2337        for idx, kind_prop in enumerate(meta_fields["kind"].expressions):
2338            if kind_prop.name.lower() == "merge_filter":
2339                meta_fields["kind"].expressions[idx] = unrendered_merge_filter
2340
2341    if isinstance(meta_fields.get("dialect"), exp.Expr):
2342        meta_fields["dialect"] = meta_fields["dialect"].name
2343
2344    # The name of the model will be inferred from its path relative to `models/`, if it's not explicitly specified
2345    name = meta_fields.pop("name", "")
2346    if not name and infer_names:
2347        if path is None:
2348            raise ValueError(f"Model {name} must have a name")
2349        name = get_model_name(path)
2350
2351    if not name:
2352        raise_config_error(
2353            "Please add the required 'name' field to the MODEL block at the top of the file.\n\n"
2354            + "Learn more at https://sqlmesh.readthedocs.io/en/stable/concepts/models/overview"
2355        )
2356    if "default_catalog" in meta_fields:
2357        raise_config_error(
2358            "`default_catalog` cannot be set on a per-model basis. It must be set at the connection level.",
2359            path,
2360        )
2361
2362    common_kwargs = dict(
2363        pre_statements=pre_statements,
2364        post_statements=post_statements,
2365        on_virtual_update=on_virtual_update,
2366        defaults=rendered_defaults,
2367        path=path,
2368        module_path=module_path,
2369        macros=macros,
2370        python_env=python_env,
2371        jinja_macros=jinja_macros,
2372        physical_schema_mapping=physical_schema_mapping,
2373        default_catalog=default_catalog,
2374        variables=variables,
2375        inline_audits=inline_audits,
2376        blueprint_variables=blueprint_variables,
2377        use_original_sql=True,
2378        **meta_fields,
2379    )
2380
2381    kind = common_kwargs.pop("kind", ModelMeta.all_field_infos()["kind"].default)
2382
2383    if kind.name != ModelKindName.SEED:
2384        return create_sql_model(
2385            name,
2386            query_or_seed_insert,
2387            kind=kind,
2388            time_column_format=time_column_format,
2389            **common_kwargs,
2390        )
2391
2392    seed_properties = {p.name.lower(): p.args.get("value") for p in kind.expressions}
2393    return create_seed_model(
2394        name,
2395        SeedKind(**seed_properties),
2396        **common_kwargs,
2397    )
2398
2399
2400def create_sql_model(
2401    name: TableName,
2402    query: t.Optional[exp.Expr],
2403    **kwargs: t.Any,
2404) -> Model:
2405    """Creates a SQL model.
2406
2407    Args:
2408        name: The name of the model, which is of the form [catalog].[db].table.
2409            The catalog and db are optional.
2410        query: The model's logic in a form of a SELECT query.
2411    """
2412    if not isinstance(query, (exp.Query, d.JinjaQuery, d.MacroFunc)):
2413        raise_config_error(
2414            "A query is required and must be a SELECT statement, a UNION statement, or a JINJA_QUERY block",
2415            kwargs.get("path"),
2416        )
2417        assert isinstance(query, (exp.Query, d.JinjaQuery, d.MacroFunc))
2418
2419    return _create_model(SqlModel, name, query=query, **kwargs)
2420
2421
2422def create_seed_model(
2423    name: TableName,
2424    seed_kind: SeedKind,
2425    *,
2426    path: t.Optional[Path] = None,
2427    module_path: Path = Path(),
2428    **kwargs: t.Any,
2429) -> Model:
2430    """Creates a Seed model.
2431
2432    Args:
2433        name: The name of the model, which is of the form [catalog].[db].table.
2434            The catalog and db are optional.
2435        seed_kind: The information about the location of a seed and other related configuration.
2436        path: An optional path to the model definition file.
2437            from the macro registry.
2438    """
2439    seed_path = Path(seed_kind.path)
2440    marker, *subdirs = seed_path.parts
2441    if marker.lower() == "$root":
2442        seed_path = module_path.joinpath(*subdirs)
2443        seed_kind.path = str(seed_path)
2444    elif not seed_path.is_absolute():
2445        if path is None:
2446            seed_path = seed_path
2447        elif path.is_dir():
2448            seed_path = path / seed_path
2449        else:
2450            seed_path = path.parent / seed_path
2451
2452    seed = create_seed(seed_path)
2453
2454    return _create_model(
2455        SeedModel,
2456        name,
2457        path=path,
2458        seed=seed,
2459        kind=seed_kind,
2460        depends_on=kwargs.pop("depends_on", None),
2461        module_path=module_path,
2462        **kwargs,
2463    )
2464
2465
2466def create_python_model(
2467    name: str,
2468    entrypoint: str,
2469    python_env: t.Dict[str, Executable],
2470    *,
2471    macros: t.Optional[MacroRegistry] = None,
2472    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
2473    path: Path = Path(),
2474    module_path: Path = Path(),
2475    depends_on: t.Optional[t.Set[str]] = None,
2476    variables: t.Optional[t.Dict[str, t.Any]] = None,
2477    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
2478    **kwargs: t.Any,
2479) -> Model:
2480    """Creates a Python model.
2481
2482    Args:
2483        name: The name of the model, which is of the form [catalog].[db].table.
2484            The catalog and db are optional.
2485        entrypoint: The name of a Python function which contains the data fetching / transformation logic.
2486        python_env: The Python environment of all objects referenced by the model implementation.
2487        path: An optional path to the model definition file.
2488        depends_on: The custom set of model's upstream dependencies.
2489        variables: The variables to pass to the model.
2490        blueprint_variables: The blueprint's variables to pass to the model.
2491    """
2492    # Find dependencies for python models by parsing code if they are not explicitly defined
2493    # Also remove self-references that are found
2494
2495    dialect = kwargs.get("dialect")
2496
2497    dependencies_unspecified = depends_on is None
2498
2499    parsed_depends_on, referenced_variables = (
2500        parse_dependencies(
2501            python_env,
2502            entrypoint,
2503            strict_resolution=dependencies_unspecified,
2504            variables=variables,
2505            blueprint_variables=blueprint_variables,
2506        )
2507        if python_env is not None
2508        else (set(), set())
2509    )
2510    if dependencies_unspecified:
2511        depends_on = parsed_depends_on - {name}
2512    else:
2513        depends_on_rendered = render_expression(
2514            expression=exp.Array(
2515                expressions=[exp.maybe_parse(dep, dialect=dialect) for dep in depends_on or []]
2516            ),
2517            module_path=module_path,
2518            macros=macros,
2519            jinja_macros=jinja_macros,
2520            variables=variables,
2521            path=path,
2522            dialect=dialect,
2523            default_catalog=kwargs.get("default_catalog"),
2524        )
2525        depends_on = {
2526            dep.sql(dialect=dialect)
2527            for dep in t.cast(t.List[exp.Expr], depends_on_rendered)[0].expressions
2528        }
2529
2530    used_variables = {k: v for k, v in (variables or {}).items() if k in referenced_variables}
2531    if used_variables:
2532        python_env[c.SQLMESH_VARS] = Executable.value(used_variables, sort_root_dict=True)
2533
2534    return _create_model(
2535        PythonModel,
2536        name,
2537        path=path,
2538        depends_on=depends_on,
2539        entrypoint=entrypoint,
2540        python_env=python_env,
2541        macros=macros,
2542        jinja_macros=jinja_macros,
2543        module_path=module_path,
2544        variables=variables,
2545        blueprint_variables=blueprint_variables,
2546        **kwargs,
2547    )
2548
2549
2550def create_external_model(
2551    name: TableName,
2552    *,
2553    dialect: t.Optional[str] = None,
2554    path: Path = Path(),
2555    defaults: t.Optional[t.Dict[str, t.Any]] = None,
2556    **kwargs: t.Any,
2557) -> ExternalModel:
2558    """Creates an external model.
2559
2560    Args:
2561        name: The name of the model, which is of the form [catalog].[db].table.
2562            The catalog and db are optional.
2563        dialect: The dialect to serialize.
2564        path: An optional path to the model definition file.
2565    """
2566    return t.cast(
2567        ExternalModel,
2568        _create_model(
2569            ExternalModel,
2570            name,
2571            defaults=defaults,
2572            dialect=dialect,
2573            path=path,
2574            kind=ModelKindName.EXTERNAL.value,
2575            **kwargs,
2576        ),
2577    )
2578
2579
2580def _create_model(
2581    klass: t.Type[_Model],
2582    name: TableName,
2583    *,
2584    defaults: t.Optional[t.Dict[str, t.Any]] = None,
2585    path: t.Optional[Path] = None,
2586    time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT,
2587    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
2588    jinja_macro_references: t.Optional[t.Set[MacroReference]] = None,
2589    depends_on: t.Optional[t.Set[str]] = None,
2590    dialect: t.Optional[str] = None,
2591    physical_schema_mapping: t.Optional[t.Dict[re.Pattern, str]] = None,
2592    python_env: t.Optional[t.Dict[str, Executable]] = None,
2593    audit_definitions: t.Optional[t.Dict[str, ModelAudit]] = None,
2594    inline_audits: t.Optional[t.Dict[str, ModelAudit]] = None,
2595    module_path: Path = Path(),
2596    macros: t.Optional[MacroRegistry] = None,
2597    signal_definitions: t.Optional[SignalRegistry] = None,
2598    variables: t.Optional[t.Dict[str, t.Any]] = None,
2599    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
2600    use_original_sql: bool = False,
2601    **kwargs: t.Any,
2602) -> Model:
2603    validate_extra_and_required_fields(
2604        klass,
2605        {"name", *kwargs} - {"grain", "table_properties"},
2606        "MODEL block",
2607        path,
2608    )
2609
2610    for prop in PROPERTIES:
2611        kwargs[prop] = _resolve_properties((defaults or {}).get(prop), kwargs.get(prop))
2612
2613    dialect = dialect or ""
2614
2615    physical_schema_mapping = physical_schema_mapping or {}
2616    model_schema_name = exp.to_table(name, dialect=dialect).db
2617    physical_schema_override: t.Optional[str] = None
2618
2619    for re_pattern, override_schema in physical_schema_mapping.items():
2620        if re.match(re_pattern, model_schema_name):
2621            physical_schema_override = override_schema
2622            break
2623
2624    raw_kind = kwargs.pop("kind", None)
2625    if raw_kind:
2626        kwargs["kind"] = create_model_kind(raw_kind, dialect, defaults or {})
2627
2628    defaults = {k: v for k, v in (defaults or {}).items() if k in klass.all_fields()}
2629    if not issubclass(klass, SqlModel):
2630        defaults.pop("optimize_query", None)
2631
2632    statements: t.List[t.Union[exp.Expr, t.Tuple[exp.Expr, bool]]] = []
2633
2634    if "query" in kwargs:
2635        statements.append(kwargs["query"])
2636        kwargs["query"] = ParsableSql.from_parsed_expression(
2637            kwargs["query"], dialect, use_meta_sql=use_original_sql
2638        )
2639
2640    # Merge default statements with model-specific statements
2641    for statement_field in ["pre_statements", "post_statements", "on_virtual_update"]:
2642        if statement_field in defaults:
2643            kwargs[statement_field] = [
2644                exp.maybe_parse(stmt, dialect=dialect) for stmt in defaults[statement_field]
2645            ] + kwargs.get(statement_field, [])
2646        if statement_field in kwargs:
2647            # Macros extracted from these statements need to be treated as metadata only
2648            is_metadata = statement_field == "on_virtual_update"
2649            for stmt in kwargs[statement_field]:
2650                # Extract the expression if it's ParsableSql already
2651                expr = stmt.parse(dialect) if isinstance(stmt, ParsableSql) else stmt
2652                statements.append((expr, is_metadata))
2653            kwargs[statement_field] = [
2654                # this to retain the transaction information
2655                stmt
2656                if isinstance(stmt, ParsableSql)
2657                else ParsableSql.from_parsed_expression(
2658                    stmt, dialect, use_meta_sql=use_original_sql
2659                )
2660                for stmt in kwargs[statement_field]
2661            ]
2662
2663    # This is done to allow variables like @gateway to be used in these properties
2664    # since rendering shifted from load time to run time.
2665    # Note: we check for Tuple since that's what we expect from _resolve_properties
2666    for property_name in PROPERTIES:
2667        property_values = kwargs.get(property_name)
2668        if isinstance(property_values, exp.Tuple):
2669            statements.extend(property_values.expressions)
2670
2671    if isinstance(getattr(kwargs.get("kind"), "merge_filter", None), exp.Expr):
2672        statements.append(kwargs["kind"].merge_filter)
2673
2674    jinja_macro_references, referenced_variables = extract_macro_references_and_variables(
2675        *(gen(e if isinstance(e, exp.Expr) else e[0]) for e in statements)
2676    )
2677
2678    if jinja_macros:
2679        jinja_macros = (
2680            jinja_macros if jinja_macros.trimmed else jinja_macros.trim(jinja_macro_references)
2681        )
2682    else:
2683        jinja_macros = JinjaMacroRegistry()
2684
2685    for jinja_macro in jinja_macros.root_macros.values():
2686        referenced_variables.update(
2687            extract_macro_references_and_variables(jinja_macro.definition)[1]
2688        )
2689
2690    # Merge model-specific audits with default audits
2691    if default_audits := defaults.pop("audits", None):
2692        kwargs["audits"] = default_audits + d.extract_function_calls(kwargs.pop("audits", []))
2693
2694    model = klass(
2695        name=name,
2696        **{
2697            **(defaults or {}),
2698            "jinja_macros": jinja_macros or JinjaMacroRegistry(),
2699            "dialect": dialect,
2700            "depends_on": depends_on,
2701            "physical_schema_override": physical_schema_override,
2702            **kwargs,
2703        },
2704    )
2705
2706    audit_definitions = {
2707        **(audit_definitions or {}),
2708        **(inline_audits or {}),
2709    }
2710
2711    used_audits: t.Set[str] = {audit_name for audit_name, _ in model.audits}
2712
2713    audit_definitions = {
2714        audit_name: audit_definitions[audit_name]
2715        for audit_name in used_audits
2716        if audit_name in audit_definitions
2717    }
2718
2719    model.audit_definitions.update(audit_definitions)
2720
2721    # Any macro referenced in audits or signals needs to be treated as metadata-only
2722    statements.extend((audit.query, True) for audit in audit_definitions.values())  # type: ignore[misc]
2723
2724    # Ensure that all audits referenced in the model are defined
2725    from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS
2726
2727    available_audits = BUILT_IN_AUDITS.keys() | model.audit_definitions.keys()
2728    for referenced_audit, audit_args in model.audits:
2729        if referenced_audit not in available_audits:
2730            raise_config_error(f"Audit '{referenced_audit}' is undefined", location=path)
2731
2732        statements.extend(
2733            (audit_arg_expression, True) for audit_arg_expression in audit_args.values()
2734        )
2735
2736    signal_definitions = signal_definitions or UniqueKeyDict("signals")
2737
2738    for referenced_signal, kwargs in model.signals:
2739        if referenced_signal and referenced_signal not in signal_definitions:
2740            raise_config_error(f"Signal '{referenced_signal}' is undefined", location=path)
2741
2742        statements.extend((signal_kwarg, True) for signal_kwarg in kwargs.values())
2743
2744    python_env = make_python_env(
2745        statements,
2746        jinja_macro_references,
2747        module_path,
2748        macros or macro.get_registry(),
2749        variables=variables,
2750        referenced_variables=referenced_variables,
2751        path=path,
2752        python_env=python_env,
2753        strict_resolution=depends_on is None,
2754        blueprint_variables=blueprint_variables,
2755        dialect=dialect,
2756    )
2757
2758    env: t.Dict[str, t.Tuple[t.Any, t.Optional[bool]]] = {}
2759
2760    for signal_name, _ in model.signals:
2761        if signal_name and signal_name in signal_definitions:
2762            func = signal_definitions[signal_name].func
2763            setattr(func, c.SQLMESH_METADATA, True)
2764            build_env(func, env=env, name=signal_name, path=module_path)
2765
2766    model.python_env.update(python_env)
2767    model.python_env.update(serialize_env(env, path=module_path))
2768    model._path = path
2769    model.set_time_format(time_column_format)
2770
2771    return t.cast(Model, model)
2772
2773
2774INSERT_SEED_MACRO_CALL = d.parse_one("@INSERT_SEED()")
2775
2776
2777def _split_sql_model_statements(
2778    expressions: t.List[exp.Expr],
2779    path: t.Optional[Path],
2780    dialect: t.Optional[str] = None,
2781) -> t.Tuple[
2782    t.Optional[exp.Expr],
2783    t.List[exp.Expr],
2784    t.List[exp.Expr],
2785    t.List[exp.Expr],
2786    UniqueKeyDict[str, ModelAudit],
2787]:
2788    """Extracts the SELECT query from a sequence of expressions.
2789
2790    Args:
2791        expressions: The list of all SQL statements in the model definition.
2792
2793    Returns:
2794        A tuple containing the extracted SELECT query or the `@INSERT_SEED()` call, the statements before the it,
2795        the statements after it, and the inline audit definitions.
2796
2797    Raises:
2798        ConfigError: If the model definition contains more than one SELECT query or `@INSERT_SEED()` call.
2799    """
2800    from sqlmesh.core.audit import ModelAudit, load_audit
2801
2802    query_positions = []
2803    sql_statements = []
2804    on_virtual_update = []
2805    inline_audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("inline_audits")
2806
2807    idx = 0
2808    length = len(expressions)
2809    while idx < length:
2810        expr = expressions[idx]
2811
2812        if isinstance(expr, d.Audit):
2813            loaded_audit = load_audit([expr, expressions[idx + 1]], dialect=dialect)
2814            assert isinstance(loaded_audit, ModelAudit)
2815            inline_audits[loaded_audit.name] = loaded_audit
2816            idx += 2
2817        elif isinstance(expr, d.VirtualUpdateStatement):
2818            for statement in expr.expressions:
2819                on_virtual_update.append(statement)
2820            idx += 1
2821        else:
2822            if (
2823                isinstance(expr, (exp.Query, d.JinjaQuery))
2824                or expr == INSERT_SEED_MACRO_CALL
2825                or (
2826                    isinstance(expr, d.MacroFunc)
2827                    and (expr.this.name.lower() == "union" or length == 1)
2828                )
2829            ):
2830                query_positions.append((expr, idx))
2831            sql_statements.append(expr)
2832            idx += 1
2833
2834    if not query_positions:
2835        return None, sql_statements, [], on_virtual_update, inline_audits
2836
2837    if len(query_positions) > 1:
2838        raise_config_error("Only one SELECT query is allowed per model", path)
2839
2840    query, pos = query_positions[0]
2841    return query, sql_statements[:pos], sql_statements[pos + 1 :], on_virtual_update, inline_audits
2842
2843
2844def _resolve_model_refs_to_physical_tables(
2845    value: exp.Expr, table_mapping: t.Dict[str, str], dialect: DialectType
2846) -> exp.Literal:
2847    """Resolve managed-model references in a property value to their physical table names.
2848
2849    The value is a single table reference or a comma-separated list of them. Each reference that
2850    matches a managed model (via ``table_mapping``) is swapped for its physical ``db.table`` name;
2851    anything else (e.g. a raw source) is kept as written. Returns a single string literal so the
2852    property renders just like a hand-written value.
2853    """
2854    if isinstance(value, exp.Literal) and value.is_string:
2855        refs = value.this.split(",")
2856    else:
2857        refs = [value.sql(dialect=dialect)]
2858
2859    def resolve(ref: str) -> str:
2860        table = exp.to_table(ref.strip(), dialect=dialect)
2861        physical = table_mapping.get(exp.table_name(table, identify=True))
2862        # Managed model -> physical table; otherwise keep the reference (just unquoted/normalized).
2863        return exp.table_name(
2864            exp.to_table(physical, dialect=dialect) if physical else table, identify=False
2865        )
2866
2867    return exp.Literal.string(",".join(resolve(ref) for ref in refs if ref.strip()))
2868
2869
2870def _resolve_properties(
2871    default: t.Optional[t.Dict[str, t.Any]],
2872    provided: t.Optional[exp.Expr | t.Dict[str, t.Any]],
2873) -> t.Optional[exp.Expr]:
2874    if isinstance(provided, dict):
2875        properties = {k: exp.Literal.string(k).eq(v) for k, v in provided.items()}
2876    elif provided:
2877        if isinstance(provided, exp.Paren):
2878            provided = exp.Tuple(expressions=[provided.this])
2879        properties = {expr.this.name: expr for expr in provided}
2880    else:
2881        properties = {}
2882
2883    for k, v in (default or {}).items():
2884        if k not in properties:
2885            properties[k] = exp.Literal.string(k).eq(v)
2886        elif properties[k].expression.sql().lower() in {"none", "null"}:
2887            del properties[k]
2888
2889    if properties:
2890        return exp.Tuple(expressions=list(properties.values()))
2891
2892    return None
2893
2894
2895def _list_of_calls_to_exp(value: t.List[t.Tuple[str, t.Dict[str, t.Any]]]) -> exp.Expr:
2896    return exp.Tuple(
2897        expressions=[
2898            exp.Anonymous(
2899                this=v[0],
2900                expressions=[
2901                    exp.EQ(this=exp.convert(left), expression=exp.convert(right))
2902                    for left, right in v[1].items()
2903                ],
2904            )
2905            for v in value
2906        ]
2907    )
2908
2909
2910def _is_projection(expr: exp.Expr) -> bool:
2911    parent = expr.parent
2912    return isinstance(parent, exp.Select) and expr.arg_key == "expressions"
2913
2914
2915def _has_ordinal_references(query: exp.Select) -> bool:
2916    order = query.args.get("order")
2917    if order and any(
2918        isinstance(ob.this, exp.Literal) and ob.this.is_number for ob in order.expressions
2919    ):
2920        return True
2921    group = query.args.get("group")
2922    return bool(
2923        group and any(isinstance(gb, exp.Literal) and gb.is_number for gb in group.expressions)
2924    )
2925
2926
2927def _additive_projection_change(
2928    previous_query: exp.Query,
2929    this_query: exp.Query,
2930    dialect: DialectType,
2931) -> t.Optional[bool]:
2932    """Fallback for when SQLGlot's tree diff can't express an additive projection change.
2933
2934    SQLGlot's diff matches nodes by structural similarity, so interchangeable leaves (e.g. two
2935    identical ``CAST(... AS T)`` target types) can be cross-matched. Inserting a same-type cast
2936    above an existing one therefore yields spurious ``Move`` / ``Update`` edits even though a
2937    column was simply added to the SELECT list. In that case the edit-based check above is
2938    inconclusive, so we verify additivity directly against the output projections.
2939
2940    Returns ``False`` (non-breaking) only when the change is provably additive:
2941      * both queries are simple ``SELECT`` statements,
2942      * everything other than the projection list is structurally identical,
2943      * no added projection is a (potentially cardinality-changing) ``UDTF``,
2944      * every previous projection is preserved, in order, within the new projection list, and
2945      * no mid-list insert shifts ordinal ``ORDER BY`` / ``GROUP BY`` references.
2946
2947    Otherwise returns ``None`` (undetermined), preserving the conservative default.
2948    """
2949    # UNIONs or other query expressions, are left to the caller's conservative diff result.
2950    if not isinstance(previous_query, exp.Select) or not isinstance(this_query, exp.Select):
2951        return None
2952
2953    previous_projections = previous_query.expressions
2954    this_projections = this_query.expressions
2955    # If the new query has not gained any projections, this cannot be an additive projection-only
2956    # change, so there is nothing for this fallback to prove.
2957    if len(this_projections) <= len(previous_projections):
2958        return None
2959
2960    # Adding a UDTF projection (e.g. EXPLODE / UNNEST) can change row cardinality, so such a
2961    # change is not safely non-breaking even when it appears as an extra SELECT item.
2962    for projection in this_projections:
2963        bare = projection.this if isinstance(projection, exp.Alias) else projection
2964        if isinstance(bare, exp.UDTF):
2965            return None
2966
2967    # Everything other than the projection list must be structurally identical. Replacing each
2968    # SELECT list with the same dummy literal lets the expression equality check focus on the
2969    # FROM / WHERE / GROUP BY / ORDER BY / etc. parts of the query.
2970    previous_skeleton = previous_query.copy()
2971    this_skeleton = this_query.copy()
2972    previous_skeleton.set("expressions", [exp.Literal.number(1)])
2973    this_skeleton.set("expressions", [exp.Literal.number(1)])
2974    if previous_skeleton != this_skeleton:
2975        return None
2976
2977    # Every previous projection must appear, in order, within the new projection list. Comparing
2978    # dialect-normalized SQL makes semantically equivalent projection nodes match even when the
2979    # parser built distinct object identities.
2980    this_projection_sql = [p.sql(dialect=dialect, comments=False) for p in this_projections]
2981    search_start = 0
2982    matched_at: list[int] = []
2983    for projection in previous_projections:
2984        target_sql = projection.sql(dialect=dialect, comments=False)
2985        # Continue after the previous match so added columns can appear before, between, or after
2986        # the original projections, but existing projections cannot be reordered or rewritten.
2987        for index in range(search_start, len(this_projection_sql)):
2988            if this_projection_sql[index] == target_sql:
2989                matched_at.append(index)
2990                search_start = index + 1
2991                break
2992        else:
2993            return None
2994
2995    # Mid-list inserts shift ordinal references in ORDER BY / GROUP BY clauses.
2996    if _has_ordinal_references(this_query):
2997        matched_set = set(matched_at)
2998        last_matched = matched_at[-1]
2999        if any(i < last_matched for i in range(len(this_projections)) if i not in matched_set):
3000            return None
3001
3002    # At this point the query shape is unchanged and all prior outputs are preserved, so the only
3003    # remaining difference is one or more additional, non-UDTF projections.
3004    return False
3005
3006
3007def _single_expr_or_tuple(values: t.Sequence[exp.Expr]) -> exp.Expr | exp.Tuple:
3008    return values[0] if len(values) == 1 else exp.Tuple(expressions=values)
3009
3010
3011def _refs_to_sql(values: t.Any) -> exp.Expr:
3012    return exp.Tuple(expressions=values)
3013
3014
3015def render_meta_fields(
3016    fields: t.Dict[str, t.Any],
3017    module_path: Path,
3018    path: t.Optional[Path],
3019    jinja_macros: t.Optional[JinjaMacroRegistry],
3020    macros: t.Optional[MacroRegistry],
3021    dialect: DialectType,
3022    variables: t.Optional[t.Dict[str, t.Any]],
3023    default_catalog: t.Optional[str],
3024    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
3025) -> t.Dict[str, t.Any]:
3026    def render_field_value(value: t.Any) -> t.Any:
3027        if isinstance(value, exp.Expr) or (isinstance(value, str) and "@" in value):
3028            expression = exp.maybe_parse(value, dialect=dialect)
3029            rendered_expr = render_expression(
3030                expression=expression,
3031                module_path=module_path,
3032                macros=macros,
3033                jinja_macros=jinja_macros,
3034                variables=variables,
3035                path=path,
3036                dialect=dialect,
3037                default_catalog=default_catalog,
3038                blueprint_variables=blueprint_variables,
3039            )
3040            if not rendered_expr:
3041                raise SQLMeshError(
3042                    f"Rendering `{expression.sql(dialect=dialect)}` did not return an expression"
3043                )
3044
3045            if len(rendered_expr) != 1:
3046                raise SQLMeshError(
3047                    f"Rendering `{expression.sql(dialect=dialect)}` must return one result, but got {len(rendered_expr)}"
3048                )
3049
3050            # For cases where a property is conditionally assigned
3051            if rendered_expr[0].sql().lower() in {"none", "null"}:
3052                return None
3053
3054            return rendered_expr[0]
3055
3056        return value
3057
3058    for field_name, field_info in ModelMeta.all_field_infos().items():
3059        field = field_info.alias or field_name
3060        field_value = fields.get(field)
3061
3062        # We don't want to parse python model cron="@..." kwargs (e.g. @daily) into MacroVar
3063        if (
3064            field == "cron"
3065            and isinstance(field_value, str)
3066            and field_value.lower() in CRON_SHORTCUTS
3067        ) or field_value is None:
3068            continue
3069
3070        if field in RUNTIME_RENDERED_MODEL_FIELDS:
3071            fields[field] = parse_strings_with_macro_refs(field_value, dialect)
3072            continue
3073
3074        if isinstance(field_value, dict):
3075            rendered_dict = {}
3076            for key, value in field_value.items():
3077                if field == "columns":
3078                    column_name = render_field_value(key)
3079                    column_type = render_field_value(value)
3080                    # If column_type is an Expr (from rendering macros), convert to string.
3081                    # Otherwise, leave it as-is (string) for the validator to parse with the correct dialect.
3082                    if isinstance(column_type, exp.Expr):
3083                        column_type = column_type.sql(dialect=dialect)
3084                    rendered_dict[column_name] = column_type
3085                elif key in RUNTIME_RENDERED_MODEL_FIELDS:
3086                    rendered_dict[key] = parse_strings_with_macro_refs(value, dialect)
3087                elif (
3088                    # don't parse kind auto_restatement_cron="@..." kwargs (e.g. @daily) into MacroVar
3089                    key == "auto_restatement_cron"
3090                    and isinstance(value, str)
3091                    and value.lower() in CRON_SHORTCUTS
3092                ):
3093                    rendered_dict[key] = value
3094                elif (rendered := render_field_value(value)) is not None:
3095                    rendered_dict[key] = rendered
3096
3097            if rendered_dict:
3098                fields[field] = rendered_dict
3099            else:
3100                fields.pop(field)
3101        elif isinstance(field_value, list):
3102            rendered_list = [
3103                rendered
3104                for value in field_value
3105                if (rendered := render_field_value(value)) is not None
3106            ]
3107            if rendered_list:
3108                fields[field] = rendered_list
3109            else:
3110                fields.pop(field)
3111        else:
3112            rendered_field = render_field_value(field_value)
3113            if rendered_field is not None:
3114                fields[field] = rendered_field
3115            else:
3116                fields.pop(field)
3117
3118    return fields
3119
3120
3121def render_model_defaults(
3122    defaults: t.Dict[str, t.Any],
3123    module_path: Path,
3124    path: t.Optional[Path],
3125    jinja_macros: t.Optional[JinjaMacroRegistry],
3126    macros: t.Optional[MacroRegistry],
3127    dialect: DialectType,
3128    variables: t.Optional[t.Dict[str, t.Any]],
3129    default_catalog: t.Optional[str],
3130) -> t.Dict[str, t.Any]:
3131    rendered_defaults = render_meta_fields(
3132        fields=defaults,
3133        module_path=module_path,
3134        macros=macros,
3135        jinja_macros=jinja_macros,
3136        variables=variables,
3137        path=path,
3138        dialect=dialect,
3139        default_catalog=default_catalog,
3140    )
3141
3142    # Validate defaults that have macros are rendered to boolean
3143    for boolean in {"optimize_query", "allow_partials", "enabled"}:
3144        var = rendered_defaults.get(boolean)
3145        if var is not None and not isinstance(var, (exp.Boolean, bool)):
3146            raise ConfigError(f"Expected boolean for '{var}', got '{type(var)}' instead")
3147
3148    # Validate the 'interval_unit' if present is an Interval Unit
3149    var = rendered_defaults.get("interval_unit")
3150    if isinstance(var, str):
3151        try:
3152            rendered_defaults["interval_unit"] = IntervalUnit(var)
3153        except ValueError as e:
3154            raise ConfigError(f"Invalid interval unit: {var}") from e
3155
3156    return rendered_defaults
3157
3158
3159def parse_defaults_properties(
3160    defaults: t.Dict[str, t.Any], dialect: DialectType
3161) -> t.Dict[str, t.Any]:
3162    for prop in PROPERTIES:
3163        default_properties = defaults.get(prop)
3164        for key, value in (default_properties or {}).items():
3165            if isinstance(key, str) and d.SQLMESH_MACRO_PREFIX in str(value):
3166                defaults[prop][key] = exp.maybe_parse(value, dialect=dialect)
3167
3168    return defaults
3169
3170
3171def render_expression(
3172    expression: exp.Expr,
3173    module_path: Path,
3174    path: t.Optional[Path],
3175    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
3176    macros: t.Optional[MacroRegistry] = None,
3177    dialect: DialectType = None,
3178    variables: t.Optional[t.Dict[str, t.Any]] = None,
3179    default_catalog: t.Optional[str] = None,
3180    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
3181) -> t.Optional[t.List[exp.Expr]]:
3182    meta_python_env = make_python_env(
3183        expressions=expression,
3184        jinja_macro_references=None,
3185        module_path=module_path,
3186        macros=macros or macro.get_registry(),
3187        variables=variables,
3188        path=path,
3189        blueprint_variables=blueprint_variables,
3190    )
3191    return ExpressionRenderer(
3192        expression,
3193        dialect,
3194        [],
3195        path=path,
3196        jinja_macro_registry=jinja_macros,
3197        python_env=meta_python_env,
3198        default_catalog=default_catalog,
3199        quote_identifiers=False,
3200        normalize_identifiers=False,
3201    ).render()
3202
3203
3204META_FIELD_CONVERTER: t.Dict[str, t.Callable] = {
3205    "start": lambda value: exp.Literal.string(value),
3206    "cron": lambda value: exp.Literal.string(value),
3207    "cron_tz": lambda value: exp.Literal.string(value),
3208    "partitioned_by_": _single_expr_or_tuple,
3209    "clustered_by": _single_expr_or_tuple,
3210    "depends_on_": lambda value: exp.Tuple(expressions=sorted(value)),
3211    "pre": _list_of_calls_to_exp,
3212    "post": _list_of_calls_to_exp,
3213    "audits": _list_of_calls_to_exp,
3214    "columns_to_types_": lambda value: exp.Schema(
3215        expressions=[exp.ColumnDef(this=exp.to_column(c), kind=t) for c, t in value.items()]
3216    ),
3217    "column_descriptions_": lambda value: exp.Schema(
3218        expressions=[exp.to_column(c).eq(d) for c, d in value.items()]
3219    ),
3220    "tags": single_value_or_tuple,
3221    "grains": _refs_to_sql,
3222    "references": _refs_to_sql,
3223    "physical_properties_": lambda value: value,
3224    "virtual_properties_": lambda value: value,
3225    "session_properties_": lambda value: value,
3226    "allow_partials": exp.convert,
3227    "signals": lambda values: exp.tuple_(
3228        *(
3229            exp.func(
3230                name, *(exp.PropertyEQ(this=exp.var(k), expression=v) for k, v in args.items())
3231            )
3232            if name
3233            else exp.Tuple(expressions=[exp.var(k).eq(v) for k, v in args.items()])
3234            for name, args in values
3235        )
3236    ),
3237    "formatting": str,
3238    "optimize_query": str,
3239    "virtual_environment_mode": lambda value: exp.Literal.string(value.value),
3240    "dbt_node_info_": lambda value: value.to_expression(),
3241    "grants_": lambda value: value,
3242    "grants_target_layer": lambda value: exp.Literal.string(value.value),
3243}
3244
3245
3246def get_model_name(path: Path) -> str:
3247    path_parts = list(path.parts[path.parts.index("models") + 1 : -1]) + [path.stem]
3248    return ".".join(path_parts[-3:])
3249
3250
3251# function applied to time column when automatically used for partitioning in INCREMENTAL_BY_TIME_RANGE models
3252def clickhouse_partition_func(
3253    column: exp.Expr, columns_to_types: t.Optional[t.Dict[str, exp.DataType]]
3254) -> exp.Expr:
3255    # `toMonday()` function accepts a Date or DateTime type column
3256
3257    col_type = (columns_to_types and columns_to_types.get(column.name)) or exp.DataType.build(
3258        "UNKNOWN"
3259    )
3260    col_type_is_conformable = col_type.is_type(
3261        exp.DataType.Type.DATE,
3262        exp.DataType.Type.DATE32,
3263        exp.DataType.Type.DATETIME,
3264        exp.DataType.Type.DATETIME64,
3265    )
3266
3267    #  if input column is already a conformable type, just pass the column
3268    if col_type_is_conformable:
3269        return exp.func("toMonday", column, dialect="clickhouse")
3270
3271    # if input column type is not known, cast input to DateTime64
3272    if col_type.is_type(exp.DataType.Type.UNKNOWN):
3273        return exp.func(
3274            "toMonday",
3275            exp.cast(column, exp.DataType.build("DateTime64(9, 'UTC')", dialect="clickhouse")),
3276            dialect="clickhouse",
3277        )
3278
3279    # if input column type is known but not conformable, cast input to DateTime64 and cast output back to original type
3280    return exp.cast(
3281        exp.func(
3282            "toMonday",
3283            exp.cast(column, exp.DataType.build("DateTime64(9, 'UTC')", dialect="clickhouse")),
3284            dialect="clickhouse",
3285        ),
3286        col_type,
3287    )
3288
3289
3290TIME_COL_PARTITION_FUNC = {"clickhouse": clickhouse_partition_func}

logger = <Logger sqlmesh.core.model.definition (WARNING)>

PROPERTIES = {'session_properties', 'virtual_properties', 'physical_properties'}

RUNTIME_RENDERED_MODEL_FIELDS = {'session_properties', 'virtual_properties', 'physical_properties', 'signals', 'merge_filter', 'audits'}

CRON_SHORTCUTS = {'@daily', '@weekly', '@midnight', '@monthly', '@hourly', '@annually', '@yearly'}

class PythonModel(_Model): View Source

1905class PythonModel(_Model):
1906    """The model definition which relies on a Python script to fetch the data.
1907
1908    Args:
1909        entrypoint: The name of a Python function which contains the data fetching / transformation logic.
1910    """
1911
1912    kind: ModelKind = FullKind()
1913    entrypoint: str
1914    source_type: t.Literal["python"] = "python"
1915
1916    def validate_definition(self) -> None:
1917        super().validate_definition()
1918
1919        if self.kind and not self.kind.supports_python_models:
1920            raise_config_error(
1921                f"Cannot create Python model '{self.name}' as the '{self.kind.name}' kind doesn't support Python models",
1922                self._path,
1923            )
1924
1925    def render(
1926        self,
1927        *,
1928        context: ExecutionContext,
1929        start: t.Optional[TimeLike] = None,
1930        end: t.Optional[TimeLike] = None,
1931        execution_time: t.Optional[TimeLike] = None,
1932        **kwargs: t.Any,
1933    ) -> t.Iterator[QueryOrDF]:
1934        env = prepare_env(self.python_env)
1935        start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH, self.dialect)
1936        execution_time = to_datetime(execution_time or c.EPOCH)
1937
1938        variables = {
1939            **env.get(c.SQLMESH_VARS, {}),
1940            **env.get(c.SQLMESH_VARS_METADATA, {}),
1941            **kwargs.pop("variables", {}),
1942        }
1943        blueprint_variables = {
1944            k: d.parse_one(v.sql, dialect=self.dialect) if isinstance(v, SqlValue) else v
1945            for k, v in {
1946                **env.get(c.SQLMESH_BLUEPRINT_VARS, {}),
1947                **env.get(c.SQLMESH_BLUEPRINT_VARS_METADATA, {}),
1948            }.items()
1949        }
1950        try:
1951            kwargs = {
1952                **variables,
1953                **kwargs,
1954                "start": start,
1955                "end": end,
1956                "execution_time": execution_time,
1957                "latest": execution_time,  # TODO: Preserved for backward compatibility. Remove in 1.0.0.
1958            }
1959            df_or_iter = env[self.entrypoint](
1960                context=context.with_variables(variables, blueprint_variables=blueprint_variables),
1961                **kwargs,
1962            )
1963
1964            if not isinstance(df_or_iter, types.GeneratorType):
1965                df_or_iter = [df_or_iter]
1966
1967            for df in df_or_iter:
1968                yield df
1969        except Exception as e:
1970            raise PythonModelEvalError(format_evaluated_code_exception(e, self.python_env))
1971
1972    def render_definition(
1973        self,
1974        include_python: bool = True,
1975        include_defaults: bool = False,
1976        render_query: bool = False,
1977    ) -> t.List[exp.Expr]:
1978        # Ignore the provided value for the include_python flag, since the Pyhon model's
1979        # definition without Python code is meaningless.
1980        return super().render_definition(
1981            include_python=True, include_defaults=include_defaults, render_query=render_query
1982        )
1983
1984    @property
1985    def is_python(self) -> bool:
1986        return True
1987
1988    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
1989        return None
1990
1991    @property
1992    def _data_hash_values_no_sql(self) -> t.List[str]:
1993        data = super()._data_hash_values_no_sql
1994        data.append(self.entrypoint)
1995        return data

The model definition which relies on a Python script to fetch the data.

Arguments:

entrypoint: The name of a Python function which contains the data fetching / transformation logic.

entrypoint: str

source_type: Literal['python']

def validate_definition(self) -> None: View Source

1916    def validate_definition(self) -> None:
1917        super().validate_definition()
1918
1919        if self.kind and not self.kind.supports_python_models:
1920            raise_config_error(
1921                f"Cannot create Python model '{self.name}' as the '{self.kind.name}' kind doesn't support Python models",
1922                self._path,
1923            )

Validates the model's definition.

Raises:

ConfigError

def render( self, *, context: sqlmesh.core.context.ExecutionContext, start: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, end: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, execution_time: Union[datetime.date, datetime.datetime, str, int, float, NoneType] = None, **kwargs: Any) -> Iterator[<MagicMock id='134323729167504'>]: View Source

1925    def render(
1926        self,
1927        *,
1928        context: ExecutionContext,
1929        start: t.Optional[TimeLike] = None,
1930        end: t.Optional[TimeLike] = None,
1931        execution_time: t.Optional[TimeLike] = None,
1932        **kwargs: t.Any,
1933    ) -> t.Iterator[QueryOrDF]:
1934        env = prepare_env(self.python_env)
1935        start, end = make_inclusive(start or c.EPOCH, end or c.EPOCH, self.dialect)
1936        execution_time = to_datetime(execution_time or c.EPOCH)
1937
1938        variables = {
1939            **env.get(c.SQLMESH_VARS, {}),
1940            **env.get(c.SQLMESH_VARS_METADATA, {}),
1941            **kwargs.pop("variables", {}),
1942        }
1943        blueprint_variables = {
1944            k: d.parse_one(v.sql, dialect=self.dialect) if isinstance(v, SqlValue) else v
1945            for k, v in {
1946                **env.get(c.SQLMESH_BLUEPRINT_VARS, {}),
1947                **env.get(c.SQLMESH_BLUEPRINT_VARS_METADATA, {}),
1948            }.items()
1949        }
1950        try:
1951            kwargs = {
1952                **variables,
1953                **kwargs,
1954                "start": start,
1955                "end": end,
1956                "execution_time": execution_time,
1957                "latest": execution_time,  # TODO: Preserved for backward compatibility. Remove in 1.0.0.
1958            }
1959            df_or_iter = env[self.entrypoint](
1960                context=context.with_variables(variables, blueprint_variables=blueprint_variables),
1961                **kwargs,
1962            )
1963
1964            if not isinstance(df_or_iter, types.GeneratorType):
1965                df_or_iter = [df_or_iter]
1966
1967            for df in df_or_iter:
1968                yield df
1969        except Exception as e:
1970            raise PythonModelEvalError(format_evaluated_code_exception(e, self.python_env))

Renders the content of this model in a form of either a SELECT query, executing which the data for this model can be fetched, or a dataframe object which contains the data itself.

The type of the returned object (query or dataframe) depends on whether the model was sourced from a SQL query, a Python script or a pre-built dataset (seed).

Arguments:

context: The execution context used for fetching data.
start: The start date/time of the run.
end: The end date/time of the run.
execution_time: The date/time time reference to use for execution time.

Returns:

A generator which yields either a query object or one of the supported dataframe objects.

def render_definition( self, include_python: bool = True, include_defaults: bool = False, render_query: bool = False) -> List[sqlglot.expressions.core.Expr]: View Source

1972    def render_definition(
1973        self,
1974        include_python: bool = True,
1975        include_defaults: bool = False,
1976        render_query: bool = False,
1977    ) -> t.List[exp.Expr]:
1978        # Ignore the provided value for the include_python flag, since the Pyhon model's
1979        # definition without Python code is meaningless.
1980        return super().render_definition(
1981            include_python=True, include_defaults=include_defaults, render_query=render_query
1982        )

Returns the original list of sql expressions comprising the model definition.

Arguments:

include_python: Whether or not to include Python code in the rendered definition.

is_python: bool View Source

1984    @property
1985    def is_python(self) -> bool:
1986        return True

def is_breaking_change( self, previous: Union[SqlModel, SeedModel, PythonModel, ExternalModel]) -> Optional[bool]: View Source

1988    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
1989        return None

Determines whether this model is a breaking change in relation to the previous model.

Arguments:

previous: The previous model to compare against.

Returns:

True if this model instance represents a breaking change, False if it's a non-breaking change and None if the nature of the change can't be determined.

model_config = {'json_encoders': {<class 'sqlglot.expressions.core.Expr'>: <function _expression_encoder>, <class 'sqlglot.expressions.datatypes.DataType'>: <function _expression_encoder>, <class 'sqlglot.expressions.query.Tuple'>: <function _expression_encoder>, typing.Union[sqlglot.expressions.query.Query, sqlmesh.core.dialect.JinjaQuery]: <function _expression_encoder>, typing.Union[sqlglot.expressions.query.Query, sqlmesh.core.dialect.JinjaQuery, sqlmesh.core.dialect.MacroFunc]: <function _expression_encoder>, <class 'datetime.tzinfo'>: <function PydanticModel.<lambda>>}, 'arbitrary_types_allowed': True, 'extra': 'forbid', 'protected_namespaces': (), 'frozen': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None: View Source

365def init_private_attributes(self: BaseModel, context: Any, /) -> None:
366    """This function is meant to behave like a BaseModel method to initialize private attributes.
367
368    It takes context as an argument since that's what pydantic-core passes when calling it.
369
370    Args:
371        self: The BaseModel instance.
372        context: The context.
373    """
374    if getattr(self, '__pydantic_private__', None) is None:
375        pydantic_private = {}
376        for name, private_attr in self.__private_attributes__.items():
377            # Avoid needlessly creating a new dict for the validated data:
378            if private_attr.default_factory_takes_validated_data:
379                default = private_attr.get_default(
380                    call_default_factory=True, validated_data={**self.__dict__, **pydantic_private}
381                )
382            else:
383                default = private_attr.get_default(call_default_factory=True)
384            if default is not PydanticUndefined:
385                pydantic_private[name] = default
386        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialize private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Arguments:

self: The BaseModel instance.
context: The context.

Inherited Members

pydantic.main.BaseModel: BaseModel; model_fields; model_computed_fields; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_rebuild; model_validate; model_validate_json; model_validate_strings; parse_file; from_orm; construct; schema; schema_json; validate; update_forward_refs
_Model: python_env; jinja_macros; audit_definitions; mapping_schema; extract_dependencies_from_query; pre_statements_; post_statements_; on_virtual_update_; copy; render_query; render_query_or_raise; render_pre_statements; render_post_statements; render_on_virtual_update; render_audit_query; pre_statements; post_statements; on_virtual_update; macro_definitions; render_signals; render_signal_calls; render_merge_filter; render_physical_properties; render_virtual_properties; render_session_properties; ctas_query; text_diff; set_time_format; convert_to_time_column; set_mapping_schema; update_schema; depends_on; columns_to_types; columns_to_types_or_raise; annotated; sorted_python_env; view_name; schema_name; physical_schema; is_sql; is_seed; depends_on_self; forward_only; disable_restatement; auto_restatement_intervals; auto_restatement_cron; auto_restatement_croniter; wap_supported; is_metadata_only_change; data_hash; audit_metadata_hash; metadata_hash; is_model; grants_table_type; full_depends_on; partitioned_by; partition_interval_unit; audits_with_args; violated_rules_for_query
sqlmesh.core.model.meta.ModelMeta: dialect; name; retention; table_format; storage_format; partitioned_by_; clustered_by; default_catalog; depends_on_; columns_to_types_; column_descriptions_; audits; grains; references; physical_schema_override; physical_properties_; virtual_properties_; session_properties_; allow_partials; signals; enabled; physical_version; gateway; optimize_query; ignored_rules_; formatting; virtual_environment_mode; grants_; grants_target_layer; ignored_rules_validator; session_properties_validator; time_column; unique_key; column_descriptions; lookback; lookback_start; batch_size; batch_concurrency; physical_properties; virtual_properties; session_properties; custom_materialization_properties; grants; all_references; on; managed_columns; when_matched; merge_filter; catalog; fully_qualified_table; fqn; on_destructive_change; on_additive_change; ignored_rules
sqlmesh.core.node._Node: project; description; owner; start; end; cron; cron_tz; interval_unit_; tags; stamp; dbt_node_info_; interval_unit; is_data_change; croniter; cron_next; cron_prev; cron_floor; is_audit; dbt_node_info
sqlmesh.core.node.DbtInfoMixin: dbt_unique_id; dbt_fqn
sqlmesh.utils.pydantic.PydanticModel: dict; json; fields_set; parse_obj; parse_raw; missing_required_fields; extra_fields; all_fields; all_field_infos; required_fields

class ExternalModel(_Model): View Source

1998class ExternalModel(_Model):
1999    """The model definition which represents an external source/table."""
2000
2001    kind: ModelKind = ExternalKind()
2002    source_type: t.Literal["external"] = "external"
2003
2004    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
2005        if not isinstance(previous, ExternalModel):
2006            return None
2007        if not previous.columns_to_types_or_raise.items() - self.columns_to_types_or_raise.items():
2008            return False
2009        return None
2010
2011    @property
2012    def depends_on(self) -> t.Set[str]:
2013        return set()
2014
2015    @property
2016    def depends_on_self(self) -> bool:
2017        return False

The model definition which represents an external source/table.

source_type: Literal['external']

def is_breaking_change( self, previous: Union[SqlModel, SeedModel, PythonModel, ExternalModel]) -> Optional[bool]: View Source

2004    def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
2005        if not isinstance(previous, ExternalModel):
2006            return None
2007        if not previous.columns_to_types_or_raise.items() - self.columns_to_types_or_raise.items():
2008            return False
2009        return None

Determines whether this model is a breaking change in relation to the previous model.

Arguments:

previous: The previous model to compare against.

Returns:

True if this model instance represents a breaking change, False if it's a non-breaking change and None if the nature of the change can't be determined.

depends_on: Set[str] View Source

2011    @property
2012    def depends_on(self) -> t.Set[str]:
2013        return set()

All of the upstream dependencies referenced in the model's query, excluding self references.

Returns:

A list of all the upstream table names.

depends_on_self: bool View Source

2015    @property
2016    def depends_on_self(self) -> bool:
2017        return False

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None: View Source

365def init_private_attributes(self: BaseModel, context: Any, /) -> None:
366    """This function is meant to behave like a BaseModel method to initialize private attributes.
367
368    It takes context as an argument since that's what pydantic-core passes when calling it.
369
370    Args:
371        self: The BaseModel instance.
372        context: The context.
373    """
374    if getattr(self, '__pydantic_private__', None) is None:
375        pydantic_private = {}
376        for name, private_attr in self.__private_attributes__.items():
377            # Avoid needlessly creating a new dict for the validated data:
378            if private_attr.default_factory_takes_validated_data:
379                default = private_attr.get_default(
380                    call_default_factory=True, validated_data={**self.__dict__, **pydantic_private}
381                )
382            else:
383                default = private_attr.get_default(call_default_factory=True)
384            if default is not PydanticUndefined:
385                pydantic_private[name] = default
386        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialize private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Arguments:

self: The BaseModel instance.
context: The context.

Inherited Members

pydantic.main.BaseModel: BaseModel; model_fields; model_computed_fields; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_rebuild; model_validate; model_validate_json; model_validate_strings; parse_file; from_orm; construct; schema; schema_json; validate; update_forward_refs
_Model: python_env; jinja_macros; audit_definitions; mapping_schema; extract_dependencies_from_query; pre_statements_; post_statements_; on_virtual_update_; copy; render; render_definition; render_query; render_query_or_raise; render_pre_statements; render_post_statements; render_on_virtual_update; render_audit_query; pre_statements; post_statements; on_virtual_update; macro_definitions; render_signals; render_signal_calls; render_merge_filter; render_physical_properties; render_virtual_properties; render_session_properties; ctas_query; text_diff; set_time_format; convert_to_time_column; set_mapping_schema; update_schema; columns_to_types; columns_to_types_or_raise; annotated; sorted_python_env; view_name; schema_name; physical_schema; is_sql; is_python; is_seed; forward_only; disable_restatement; auto_restatement_intervals; auto_restatement_cron; auto_restatement_croniter; wap_supported; validate_definition; is_metadata_only_change; data_hash; audit_metadata_hash; metadata_hash; is_model; grants_table_type; full_depends_on; partitioned_by; partition_interval_unit; audits_with_args; violated_rules_for_query
sqlmesh.core.model.meta.ModelMeta: dialect; name; retention; table_format; storage_format; partitioned_by_; clustered_by; default_catalog; depends_on_; columns_to_types_; column_descriptions_; audits; grains; references; physical_schema_override; physical_properties_; virtual_properties_; session_properties_; allow_partials; signals; enabled; physical_version; gateway; optimize_query; ignored_rules_; formatting; virtual_environment_mode; grants_; grants_target_layer; ignored_rules_validator; session_properties_validator; time_column; unique_key; column_descriptions; lookback; lookback_start; batch_size; batch_concurrency; physical_properties; virtual_properties; session_properties; custom_materialization_properties; grants; all_references; on; managed_columns; when_matched; merge_filter; catalog; fully_qualified_table; fqn; on_destructive_change; on_additive_change; ignored_rules
sqlmesh.core.node._Node: project; description; owner; start; end; cron; cron_tz; interval_unit_; tags; stamp; dbt_node_info_; interval_unit; is_data_change; croniter; cron_next; cron_prev; cron_floor; is_audit; dbt_node_info
sqlmesh.core.node.DbtInfoMixin: dbt_unique_id; dbt_fqn
sqlmesh.utils.pydantic.PydanticModel: dict; json; fields_set; parse_obj; parse_raw; missing_required_fields; extra_fields; all_fields; all_field_infos; required_fields

Model = typing.Union[SqlModel, SeedModel, PythonModel, ExternalModel]

class AuditResult(sqlmesh.utils.pydantic.PydanticModel): View Source

2023class AuditResult(PydanticModel):
2024    audit: Audit
2025    """The audit this result is for."""
2026    audit_args: t.Dict[t.Any, t.Any]
2027    """Arguments passed to the audit."""
2028    model: t.Optional[_Model] = None
2029    """The model this audit is for."""
2030    count: t.Optional[int] = None
2031    """The number of records returned by the audit query. This could be None if the audit was skipped."""
2032    query: t.Optional[exp.Expr] = None
2033    """The rendered query used by the audit. This could be None if the audit was skipped."""
2034    skipped: bool = False
2035    """Whether or not the audit was blocking. This can be overriden by the user."""
2036    blocking: bool = True

!!! abstract "Usage Documentation" Models

A base class for creating Pydantic models.

Attributes:

__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: A dictionary containing metadata about generic Pydantic models. The origin and args items map to the [__origin__][genericalias.__origin__] and [__args__][genericalias.__args__] attributes of [generic aliases][types-genericalias], and the parameter item maps to the __parameter__ attribute of generic classes.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [RootModel][pydantic.root_model.RootModel].
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [extra][pydantic.config.ConfigDict.extra] is set to 'allow'.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

audit: Union[sqlmesh.core.audit.definition.ModelAudit, sqlmesh.core.audit.definition.StandaloneAudit]

The audit this result is for.

audit_args: Dict[Any, Any]

Arguments passed to the audit.

model: Optional[sqlmesh.core.model.definition._Model]

The model this audit is for.

count: Optional[int]

The number of records returned by the audit query. This could be None if the audit was skipped.

query: Optional[sqlglot.expressions.core.Expr]

The rendered query used by the audit. This could be None if the audit was skipped.

skipped: bool

Whether or not the audit was blocking. This can be overriden by the user.

blocking: bool

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

Inherited Members

pydantic.main.BaseModel: BaseModel; model_fields; model_computed_fields; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; parse_file; from_orm; construct; schema; schema_json; validate; update_forward_refs
sqlmesh.utils.pydantic.PydanticModel: dict; json; copy; fields_set; parse_obj; parse_raw; missing_required_fields; extra_fields; all_fields; all_field_infos; required_fields

class EvaluatableSignals(sqlmesh.utils.pydantic.PydanticModel): View Source

2039class EvaluatableSignals(PydanticModel):
2040    signals_to_kwargs: t.Dict[str, t.Dict[str, t.Optional[exp.Expr]]]
2041    """A mapping of signal names to the kwargs passed to the signal."""
2042    python_env: t.Dict[str, Executable]
2043    """The Python environment that should be used to evaluated the rendered signal calls."""
2044    prepared_python_env: t.Dict[str, t.Any]
2045    """The prepared Python environment that should be used to evaluated the rendered signal calls."""

!!! abstract "Usage Documentation" Models

A base class for creating Pydantic models.

Attributes:

__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: A dictionary containing metadata about generic Pydantic models. The origin and args items map to the [__origin__][genericalias.__origin__] and [__args__][genericalias.__args__] attributes of [generic aliases][types-genericalias], and the parameter item maps to the __parameter__ attribute of generic classes.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [RootModel][pydantic.root_model.RootModel].
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [extra][pydantic.config.ConfigDict.extra] is set to 'allow'.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

signals_to_kwargs: Dict[str, Dict[str, Optional[sqlglot.expressions.core.Expr]]]

A mapping of signal names to the kwargs passed to the signal.

python_env: Dict[str, sqlmesh.utils.metaprogramming.Executable]

The Python environment that should be used to evaluated the rendered signal calls.

prepared_python_env: Dict[str, Any]

The prepared Python environment that should be used to evaluated the rendered signal calls.

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

Inherited Members

pydantic.main.BaseModel: BaseModel; model_fields; model_computed_fields; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; parse_file; from_orm; construct; schema; schema_json; validate; update_forward_refs
sqlmesh.utils.pydantic.PydanticModel: dict; json; copy; fields_set; parse_obj; parse_raw; missing_required_fields; extra_fields; all_fields; all_field_infos; required_fields

def create_models_from_blueprints( gateway: Union[str, sqlglot.expressions.core.Expr, NoneType], blueprints: Any, get_variables: Callable[[Optional[str]], Dict[str, str]], loader: Callable[..., Union[SqlModel, SeedModel, PythonModel, ExternalModel]], path: pathlib.Path = PosixPath('.'), module_path: pathlib.Path = PosixPath('.'), dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType] = None, default_catalog_per_gateway: Optional[Dict[str, str]] = None, **loader_kwargs: Any) -> List[Union[SqlModel, SeedModel, PythonModel, ExternalModel]]: View Source

2084def create_models_from_blueprints(
2085    gateway: t.Optional[str | exp.Expr],
2086    blueprints: t.Any,
2087    get_variables: t.Callable[[t.Optional[str]], t.Dict[str, str]],
2088    loader: t.Callable[..., Model],
2089    path: Path = Path(),
2090    module_path: Path = Path(),
2091    dialect: DialectType = None,
2092    default_catalog_per_gateway: t.Optional[t.Dict[str, str]] = None,
2093    **loader_kwargs: t.Any,
2094) -> t.List[Model]:
2095    model_blueprints: t.List[Model] = []
2096    original_default_catalog = loader_kwargs.get("default_catalog")
2097    for blueprint in _extract_blueprints(blueprints, path):
2098        loader_kwargs["default_catalog"] = original_default_catalog
2099        blueprint_variables = _extract_blueprint_variables(blueprint, path)
2100
2101        if gateway:
2102            rendered_gateway = render_expression(
2103                expression=exp.maybe_parse(gateway, dialect=dialect),
2104                module_path=module_path,
2105                macros=loader_kwargs.get("macros"),
2106                jinja_macros=loader_kwargs.get("jinja_macros"),
2107                path=path,
2108                dialect=dialect,
2109                default_catalog=loader_kwargs.get("default_catalog"),
2110                blueprint_variables=blueprint_variables,
2111            )
2112            gateway_name = rendered_gateway[0].name if rendered_gateway else None
2113        else:
2114            gateway_name = None
2115
2116        if default_catalog_per_gateway and gateway_name:
2117            catalog = default_catalog_per_gateway.get(gateway_name)
2118            if catalog is not None:
2119                loader_kwargs["default_catalog"] = catalog
2120            else:
2121                # Gateway exists but has no entry in the dict (e.g., catalog-unsupported
2122                # engines like ClickHouse). Clear the default catalog so the global
2123                # default from the primary gateway doesn't leak into this model's name.
2124                loader_kwargs["default_catalog"] = None
2125
2126        model_blueprints.append(
2127            loader(
2128                path=path,
2129                module_path=module_path,
2130                dialect=dialect,
2131                variables=get_variables(gateway_name),
2132                blueprint_variables=blueprint_variables,
2133                **loader_kwargs,
2134            )
2135        )
2136
2137    return model_blueprints

def load_sql_based_models( expressions: List[sqlglot.expressions.core.Expr], get_variables: Callable[[Optional[str]], Dict[str, str]], path: pathlib.Path = PosixPath('.'), module_path: pathlib.Path = PosixPath('.'), dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType] = None, default_catalog_per_gateway: Optional[Dict[str, str]] = None, **loader_kwargs: Any) -> List[Union[SqlModel, SeedModel, PythonModel, ExternalModel]]: View Source

2140def load_sql_based_models(
2141    expressions: t.List[exp.Expr],
2142    get_variables: t.Callable[[t.Optional[str]], t.Dict[str, str]],
2143    path: Path = Path(),
2144    module_path: Path = Path(),
2145    dialect: DialectType = None,
2146    default_catalog_per_gateway: t.Optional[t.Dict[str, str]] = None,
2147    **loader_kwargs: t.Any,
2148) -> t.List[Model]:
2149    gateway: t.Optional[exp.Expr] = None
2150    blueprints: t.Optional[exp.Expr] = None
2151
2152    model_meta = seq_get(expressions, 0)
2153    for prop in (isinstance(model_meta, d.Model) and model_meta.expressions) or []:
2154        if prop.name == "gateway":
2155            gateway = prop.args["value"]
2156        elif prop.name == "blueprints":
2157            # We pop the `blueprints` here to avoid walking large lists when rendering the meta
2158            blueprints = prop.pop().args["value"]
2159
2160    if isinstance(blueprints, d.MacroFunc):
2161        rendered_blueprints = render_expression(
2162            expression=blueprints,
2163            module_path=module_path,
2164            macros=loader_kwargs.get("macros"),
2165            jinja_macros=loader_kwargs.get("jinja_macros"),
2166            variables=get_variables(None),
2167            path=path,
2168            dialect=dialect,
2169            default_catalog=loader_kwargs.get("default_catalog"),
2170        )
2171        if not rendered_blueprints:
2172            raise_config_error("Failed to render blueprints property", path)
2173
2174        # Help mypy see that rendered_blueprints can't be None
2175        assert rendered_blueprints
2176
2177        if len(rendered_blueprints) > 1:
2178            rendered_blueprints = [exp.Tuple(expressions=rendered_blueprints)]
2179
2180        blueprints = rendered_blueprints[0]
2181
2182    return create_models_from_blueprints(
2183        gateway=gateway,
2184        blueprints=blueprints,
2185        get_variables=get_variables,
2186        loader=partial(load_sql_based_model, expressions),
2187        path=path,
2188        module_path=module_path,
2189        dialect=dialect,
2190        default_catalog_per_gateway=default_catalog_per_gateway,
2191        **loader_kwargs,
2192    )

def load_sql_based_model( expressions: List[sqlglot.expressions.core.Expr], *, defaults: Optional[Dict[str, Any]] = None, path: Optional[pathlib.Path] = None, module_path: pathlib.Path = PosixPath('.'), time_column_format: str = '%Y-%m-%d', macros: Optional[sqlmesh.utils.UniqueKeyDict[str, Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]]] = None, jinja_macros: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, audits: Optional[Dict[str, sqlmesh.core.audit.definition.ModelAudit]] = None, python_env: Optional[Dict[str, sqlmesh.utils.metaprogramming.Executable]] = None, dialect: Optional[str] = None, physical_schema_mapping: Optional[Dict[re.Pattern, str]] = None, default_catalog: Optional[str] = None, variables: Optional[Dict[str, Any]] = None, infer_names: Optional[bool] = False, blueprint_variables: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Union[SqlModel, SeedModel, PythonModel, ExternalModel]: View Source

2195def load_sql_based_model(
2196    expressions: t.List[exp.Expr],
2197    *,
2198    defaults: t.Optional[t.Dict[str, t.Any]] = None,
2199    path: t.Optional[Path] = None,
2200    module_path: Path = Path(),
2201    time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT,
2202    macros: t.Optional[MacroRegistry] = None,
2203    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
2204    audits: t.Optional[t.Dict[str, ModelAudit]] = None,
2205    python_env: t.Optional[t.Dict[str, Executable]] = None,
2206    dialect: t.Optional[str] = None,
2207    physical_schema_mapping: t.Optional[t.Dict[re.Pattern, str]] = None,
2208    default_catalog: t.Optional[str] = None,
2209    variables: t.Optional[t.Dict[str, t.Any]] = None,
2210    infer_names: t.Optional[bool] = False,
2211    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
2212    **kwargs: t.Any,
2213) -> Model:
2214    """Load a model from a parsed SQLMesh model SQL file.
2215
2216    Args:
2217        expressions: Model, *Statements, Query.
2218        defaults: Definition default values.
2219        path: An optional path to the model definition file.
2220        module_path: The python module path to serialize macros for.
2221        time_column_format: The default time column format to use if no model time column is configured.
2222        macros: The custom registry of macros. If not provided the default registry will be used.
2223        jinja_macros: The registry of Jinja macros.
2224        python_env: The custom Python environment for macros. If not provided the environment will be constructed
2225            from the macro registry.
2226        dialect: The default dialect if no model dialect is configured.
2227            The format must adhere to Python's strftime codes.
2228        physical_schema_mapping: A mapping of regular expressions to match against the model schema to produce the corresponding physical schema
2229        default_catalog: The default catalog if no model catalog is configured.
2230        variables: The variables to pass to the model.
2231        kwargs: Additional kwargs to pass to the loader.
2232    """
2233    missing_model_msg = f"""Please add a MODEL block at the top of the file. Example:
2234
2235MODEL (
2236  name sqlmesh_example.full_model, --model name
2237  kind FULL, --materialization
2238  cron '@daily', --schedule
2239);
2240
2241Learn more at https://sqlmesh.readthedocs.io/en/stable/concepts/models/overview
2242"""
2243
2244    if not expressions:
2245        raise_config_error(missing_model_msg)
2246
2247    dialect = dialect or ""
2248    meta = expressions[0]
2249    if not isinstance(meta, d.Model):
2250        if not infer_names:
2251            raise_config_error(missing_model_msg)
2252        meta = d.Model(expressions=[])  # Dummy meta node
2253        expressions.insert(0, meta)
2254
2255    # We deliberately hold off rendering some properties at load time because there is not enough information available
2256    # at load time to render them. They will get rendered later at evaluation time
2257    unrendered_properties = {}
2258    unrendered_merge_filter = None
2259
2260    for prop in meta.expressions:
2261        # Macro functions that programmaticaly generate the key-value pair properties should be rendered
2262        # This is needed in the odd case where a macro shares the name of one of the properties
2263        # eg `@session_properties()` Test: `test_macros_in_model_statement` Reference PR: #2574
2264        if isinstance(prop, d.MacroFunc):
2265            continue
2266
2267        prop_name = prop.name.lower()
2268        if prop_name in {"signals", "audits"} | PROPERTIES:
2269            unrendered_properties[prop_name] = prop.args.get("value")
2270        elif (
2271            prop.name.lower() == "kind"
2272            and (value := prop.args.get("value"))
2273            and value.name.lower() == "incremental_by_unique_key"
2274        ):
2275            for kind_prop in value.expressions:
2276                if kind_prop.name.lower() == "merge_filter":
2277                    unrendered_merge_filter = kind_prop
2278
2279    rendered_meta_exprs = render_expression(
2280        expression=meta,
2281        module_path=module_path,
2282        macros=macros,
2283        jinja_macros=jinja_macros,
2284        variables=variables,
2285        path=path,
2286        dialect=dialect,
2287        default_catalog=default_catalog,
2288        blueprint_variables=blueprint_variables,
2289    )
2290
2291    if rendered_meta_exprs is None or len(rendered_meta_exprs) != 1:
2292        raise_config_error(
2293            f"Invalid MODEL statement:\n{meta.sql(dialect=dialect, pretty=True)}",
2294            path,
2295        )
2296        raise
2297
2298    rendered_meta = rendered_meta_exprs[0]
2299
2300    rendered_defaults = (
2301        render_model_defaults(
2302            defaults=defaults,
2303            module_path=module_path,
2304            macros=macros,
2305            jinja_macros=jinja_macros,
2306            variables=variables,
2307            path=path,
2308            dialect=dialect,
2309            default_catalog=default_catalog,
2310        )
2311        if defaults
2312        else {}
2313    )
2314
2315    rendered_defaults = parse_defaults_properties(rendered_defaults, dialect=dialect)
2316
2317    # Extract the query and any pre/post statements
2318    query_or_seed_insert, pre_statements, post_statements, on_virtual_update, inline_audits = (
2319        _split_sql_model_statements(expressions[1:], path, dialect=dialect)
2320    )
2321
2322    meta_fields: t.Dict[str, t.Any] = {
2323        "dialect": dialect,
2324        "description": (
2325            "\n".join(comment.strip() for comment in rendered_meta.comments)
2326            if rendered_meta.comments
2327            else None
2328        ),
2329        **{prop.name.lower(): prop.args.get("value") for prop in rendered_meta.expressions},
2330        **kwargs,
2331    }
2332
2333    # Discard the potentially half-rendered versions of these properties and replace them with the
2334    # original unrendered versions. They will get rendered properly at evaluation time
2335    meta_fields.update(unrendered_properties)
2336
2337    if unrendered_merge_filter:
2338        for idx, kind_prop in enumerate(meta_fields["kind"].expressions):
2339            if kind_prop.name.lower() == "merge_filter":
2340                meta_fields["kind"].expressions[idx] = unrendered_merge_filter
2341
2342    if isinstance(meta_fields.get("dialect"), exp.Expr):
2343        meta_fields["dialect"] = meta_fields["dialect"].name
2344
2345    # The name of the model will be inferred from its path relative to `models/`, if it's not explicitly specified
2346    name = meta_fields.pop("name", "")
2347    if not name and infer_names:
2348        if path is None:
2349            raise ValueError(f"Model {name} must have a name")
2350        name = get_model_name(path)
2351
2352    if not name:
2353        raise_config_error(
2354            "Please add the required 'name' field to the MODEL block at the top of the file.\n\n"
2355            + "Learn more at https://sqlmesh.readthedocs.io/en/stable/concepts/models/overview"
2356        )
2357    if "default_catalog" in meta_fields:
2358        raise_config_error(
2359            "`default_catalog` cannot be set on a per-model basis. It must be set at the connection level.",
2360            path,
2361        )
2362
2363    common_kwargs = dict(
2364        pre_statements=pre_statements,
2365        post_statements=post_statements,
2366        on_virtual_update=on_virtual_update,
2367        defaults=rendered_defaults,
2368        path=path,
2369        module_path=module_path,
2370        macros=macros,
2371        python_env=python_env,
2372        jinja_macros=jinja_macros,
2373        physical_schema_mapping=physical_schema_mapping,
2374        default_catalog=default_catalog,
2375        variables=variables,
2376        inline_audits=inline_audits,
2377        blueprint_variables=blueprint_variables,
2378        use_original_sql=True,
2379        **meta_fields,
2380    )
2381
2382    kind = common_kwargs.pop("kind", ModelMeta.all_field_infos()["kind"].default)
2383
2384    if kind.name != ModelKindName.SEED:
2385        return create_sql_model(
2386            name,
2387            query_or_seed_insert,
2388            kind=kind,
2389            time_column_format=time_column_format,
2390            **common_kwargs,
2391        )
2392
2393    seed_properties = {p.name.lower(): p.args.get("value") for p in kind.expressions}
2394    return create_seed_model(
2395        name,
2396        SeedKind(**seed_properties),
2397        **common_kwargs,
2398    )

Load a model from a parsed SQLMesh model SQL file.

Arguments:

expressions: Model, *Statements, Query.
defaults: Definition default values.
path: An optional path to the model definition file.
module_path: The python module path to serialize macros for.
time_column_format: The default time column format to use if no model time column is configured.
macros: The custom registry of macros. If not provided the default registry will be used.
jinja_macros: The registry of Jinja macros.
python_env: The custom Python environment for macros. If not provided the environment will be constructed from the macro registry.
dialect: The default dialect if no model dialect is configured. The format must adhere to Python's strftime codes.
physical_schema_mapping: A mapping of regular expressions to match against the model schema to produce the corresponding physical schema
default_catalog: The default catalog if no model catalog is configured.
variables: The variables to pass to the model.
kwargs: Additional kwargs to pass to the loader.

def create_sql_model( name: Union[str, sqlglot.expressions.query.Table], query: Optional[sqlglot.expressions.core.Expr], **kwargs: Any) -> Union[SqlModel, SeedModel, PythonModel, ExternalModel]: View Source

2401def create_sql_model(
2402    name: TableName,
2403    query: t.Optional[exp.Expr],
2404    **kwargs: t.Any,
2405) -> Model:
2406    """Creates a SQL model.
2407
2408    Args:
2409        name: The name of the model, which is of the form [catalog].[db].table.
2410            The catalog and db are optional.
2411        query: The model's logic in a form of a SELECT query.
2412    """
2413    if not isinstance(query, (exp.Query, d.JinjaQuery, d.MacroFunc)):
2414        raise_config_error(
2415            "A query is required and must be a SELECT statement, a UNION statement, or a JINJA_QUERY block",
2416            kwargs.get("path"),
2417        )
2418        assert isinstance(query, (exp.Query, d.JinjaQuery, d.MacroFunc))
2419
2420    return _create_model(SqlModel, name, query=query, **kwargs)

Creates a SQL model.

Arguments:

name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
query: The model's logic in a form of a SELECT query.

def create_seed_model( name: Union[str, sqlglot.expressions.query.Table], seed_kind: sqlmesh.core.model.kind.SeedKind, *, path: Optional[pathlib.Path] = None, module_path: pathlib.Path = PosixPath('.'), **kwargs: Any) -> Union[SqlModel, SeedModel, PythonModel, ExternalModel]: View Source

2423def create_seed_model(
2424    name: TableName,
2425    seed_kind: SeedKind,
2426    *,
2427    path: t.Optional[Path] = None,
2428    module_path: Path = Path(),
2429    **kwargs: t.Any,
2430) -> Model:
2431    """Creates a Seed model.
2432
2433    Args:
2434        name: The name of the model, which is of the form [catalog].[db].table.
2435            The catalog and db are optional.
2436        seed_kind: The information about the location of a seed and other related configuration.
2437        path: An optional path to the model definition file.
2438            from the macro registry.
2439    """
2440    seed_path = Path(seed_kind.path)
2441    marker, *subdirs = seed_path.parts
2442    if marker.lower() == "$root":
2443        seed_path = module_path.joinpath(*subdirs)
2444        seed_kind.path = str(seed_path)
2445    elif not seed_path.is_absolute():
2446        if path is None:
2447            seed_path = seed_path
2448        elif path.is_dir():
2449            seed_path = path / seed_path
2450        else:
2451            seed_path = path.parent / seed_path
2452
2453    seed = create_seed(seed_path)
2454
2455    return _create_model(
2456        SeedModel,
2457        name,
2458        path=path,
2459        seed=seed,
2460        kind=seed_kind,
2461        depends_on=kwargs.pop("depends_on", None),
2462        module_path=module_path,
2463        **kwargs,
2464    )

Creates a Seed model.

Arguments:

name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
seed_kind: The information about the location of a seed and other related configuration.
path: An optional path to the model definition file. from the macro registry.

def create_python_model( name: str, entrypoint: str, python_env: Dict[str, sqlmesh.utils.metaprogramming.Executable], *, macros: Optional[sqlmesh.utils.UniqueKeyDict[str, Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]]] = None, jinja_macros: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, path: pathlib.Path = PosixPath('.'), module_path: pathlib.Path = PosixPath('.'), depends_on: Optional[Set[str]] = None, variables: Optional[Dict[str, Any]] = None, blueprint_variables: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Union[SqlModel, SeedModel, PythonModel, ExternalModel]: View Source

2467def create_python_model(
2468    name: str,
2469    entrypoint: str,
2470    python_env: t.Dict[str, Executable],
2471    *,
2472    macros: t.Optional[MacroRegistry] = None,
2473    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
2474    path: Path = Path(),
2475    module_path: Path = Path(),
2476    depends_on: t.Optional[t.Set[str]] = None,
2477    variables: t.Optional[t.Dict[str, t.Any]] = None,
2478    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
2479    **kwargs: t.Any,
2480) -> Model:
2481    """Creates a Python model.
2482
2483    Args:
2484        name: The name of the model, which is of the form [catalog].[db].table.
2485            The catalog and db are optional.
2486        entrypoint: The name of a Python function which contains the data fetching / transformation logic.
2487        python_env: The Python environment of all objects referenced by the model implementation.
2488        path: An optional path to the model definition file.
2489        depends_on: The custom set of model's upstream dependencies.
2490        variables: The variables to pass to the model.
2491        blueprint_variables: The blueprint's variables to pass to the model.
2492    """
2493    # Find dependencies for python models by parsing code if they are not explicitly defined
2494    # Also remove self-references that are found
2495
2496    dialect = kwargs.get("dialect")
2497
2498    dependencies_unspecified = depends_on is None
2499
2500    parsed_depends_on, referenced_variables = (
2501        parse_dependencies(
2502            python_env,
2503            entrypoint,
2504            strict_resolution=dependencies_unspecified,
2505            variables=variables,
2506            blueprint_variables=blueprint_variables,
2507        )
2508        if python_env is not None
2509        else (set(), set())
2510    )
2511    if dependencies_unspecified:
2512        depends_on = parsed_depends_on - {name}
2513    else:
2514        depends_on_rendered = render_expression(
2515            expression=exp.Array(
2516                expressions=[exp.maybe_parse(dep, dialect=dialect) for dep in depends_on or []]
2517            ),
2518            module_path=module_path,
2519            macros=macros,
2520            jinja_macros=jinja_macros,
2521            variables=variables,
2522            path=path,
2523            dialect=dialect,
2524            default_catalog=kwargs.get("default_catalog"),
2525        )
2526        depends_on = {
2527            dep.sql(dialect=dialect)
2528            for dep in t.cast(t.List[exp.Expr], depends_on_rendered)[0].expressions
2529        }
2530
2531    used_variables = {k: v for k, v in (variables or {}).items() if k in referenced_variables}
2532    if used_variables:
2533        python_env[c.SQLMESH_VARS] = Executable.value(used_variables, sort_root_dict=True)
2534
2535    return _create_model(
2536        PythonModel,
2537        name,
2538        path=path,
2539        depends_on=depends_on,
2540        entrypoint=entrypoint,
2541        python_env=python_env,
2542        macros=macros,
2543        jinja_macros=jinja_macros,
2544        module_path=module_path,
2545        variables=variables,
2546        blueprint_variables=blueprint_variables,
2547        **kwargs,
2548    )

Creates a Python model.

Arguments:

name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
entrypoint: The name of a Python function which contains the data fetching / transformation logic.
python_env: The Python environment of all objects referenced by the model implementation.
path: An optional path to the model definition file.
depends_on: The custom set of model's upstream dependencies.
variables: The variables to pass to the model.
blueprint_variables: The blueprint's variables to pass to the model.

def create_external_model( name: Union[str, sqlglot.expressions.query.Table], *, dialect: Optional[str] = None, path: pathlib.Path = PosixPath('.'), defaults: Optional[Dict[str, Any]] = None, **kwargs: Any) -> ExternalModel: View Source

2551def create_external_model(
2552    name: TableName,
2553    *,
2554    dialect: t.Optional[str] = None,
2555    path: Path = Path(),
2556    defaults: t.Optional[t.Dict[str, t.Any]] = None,
2557    **kwargs: t.Any,
2558) -> ExternalModel:
2559    """Creates an external model.
2560
2561    Args:
2562        name: The name of the model, which is of the form [catalog].[db].table.
2563            The catalog and db are optional.
2564        dialect: The dialect to serialize.
2565        path: An optional path to the model definition file.
2566    """
2567    return t.cast(
2568        ExternalModel,
2569        _create_model(
2570            ExternalModel,
2571            name,
2572            defaults=defaults,
2573            dialect=dialect,
2574            path=path,
2575            kind=ModelKindName.EXTERNAL.value,
2576            **kwargs,
2577        ),
2578    )

Creates an external model.

Arguments:

name: The name of the model, which is of the form [catalog].[db].table. The catalog and db are optional.
dialect: The dialect to serialize.
path: An optional path to the model definition file.

INSERT_SEED_MACRO_CALL = MacroFunc( this=Anonymous(this=INSERT_SEED))

def render_meta_fields( fields: Dict[str, Any], module_path: pathlib.Path, path: Optional[pathlib.Path], jinja_macros: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry], macros: Optional[sqlmesh.utils.UniqueKeyDict[str, Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]]], dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType], variables: Optional[Dict[str, Any]], default_catalog: Optional[str], blueprint_variables: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: View Source

3016def render_meta_fields(
3017    fields: t.Dict[str, t.Any],
3018    module_path: Path,
3019    path: t.Optional[Path],
3020    jinja_macros: t.Optional[JinjaMacroRegistry],
3021    macros: t.Optional[MacroRegistry],
3022    dialect: DialectType,
3023    variables: t.Optional[t.Dict[str, t.Any]],
3024    default_catalog: t.Optional[str],
3025    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
3026) -> t.Dict[str, t.Any]:
3027    def render_field_value(value: t.Any) -> t.Any:
3028        if isinstance(value, exp.Expr) or (isinstance(value, str) and "@" in value):
3029            expression = exp.maybe_parse(value, dialect=dialect)
3030            rendered_expr = render_expression(
3031                expression=expression,
3032                module_path=module_path,
3033                macros=macros,
3034                jinja_macros=jinja_macros,
3035                variables=variables,
3036                path=path,
3037                dialect=dialect,
3038                default_catalog=default_catalog,
3039                blueprint_variables=blueprint_variables,
3040            )
3041            if not rendered_expr:
3042                raise SQLMeshError(
3043                    f"Rendering `{expression.sql(dialect=dialect)}` did not return an expression"
3044                )
3045
3046            if len(rendered_expr) != 1:
3047                raise SQLMeshError(
3048                    f"Rendering `{expression.sql(dialect=dialect)}` must return one result, but got {len(rendered_expr)}"
3049                )
3050
3051            # For cases where a property is conditionally assigned
3052            if rendered_expr[0].sql().lower() in {"none", "null"}:
3053                return None
3054
3055            return rendered_expr[0]
3056
3057        return value
3058
3059    for field_name, field_info in ModelMeta.all_field_infos().items():
3060        field = field_info.alias or field_name
3061        field_value = fields.get(field)
3062
3063        # We don't want to parse python model cron="@..." kwargs (e.g. @daily) into MacroVar
3064        if (
3065            field == "cron"
3066            and isinstance(field_value, str)
3067            and field_value.lower() in CRON_SHORTCUTS
3068        ) or field_value is None:
3069            continue
3070
3071        if field in RUNTIME_RENDERED_MODEL_FIELDS:
3072            fields[field] = parse_strings_with_macro_refs(field_value, dialect)
3073            continue
3074
3075        if isinstance(field_value, dict):
3076            rendered_dict = {}
3077            for key, value in field_value.items():
3078                if field == "columns":
3079                    column_name = render_field_value(key)
3080                    column_type = render_field_value(value)
3081                    # If column_type is an Expr (from rendering macros), convert to string.
3082                    # Otherwise, leave it as-is (string) for the validator to parse with the correct dialect.
3083                    if isinstance(column_type, exp.Expr):
3084                        column_type = column_type.sql(dialect=dialect)
3085                    rendered_dict[column_name] = column_type
3086                elif key in RUNTIME_RENDERED_MODEL_FIELDS:
3087                    rendered_dict[key] = parse_strings_with_macro_refs(value, dialect)
3088                elif (
3089                    # don't parse kind auto_restatement_cron="@..." kwargs (e.g. @daily) into MacroVar
3090                    key == "auto_restatement_cron"
3091                    and isinstance(value, str)
3092                    and value.lower() in CRON_SHORTCUTS
3093                ):
3094                    rendered_dict[key] = value
3095                elif (rendered := render_field_value(value)) is not None:
3096                    rendered_dict[key] = rendered
3097
3098            if rendered_dict:
3099                fields[field] = rendered_dict
3100            else:
3101                fields.pop(field)
3102        elif isinstance(field_value, list):
3103            rendered_list = [
3104                rendered
3105                for value in field_value
3106                if (rendered := render_field_value(value)) is not None
3107            ]
3108            if rendered_list:
3109                fields[field] = rendered_list
3110            else:
3111                fields.pop(field)
3112        else:
3113            rendered_field = render_field_value(field_value)
3114            if rendered_field is not None:
3115                fields[field] = rendered_field
3116            else:
3117                fields.pop(field)
3118
3119    return fields

def render_model_defaults( defaults: Dict[str, Any], module_path: pathlib.Path, path: Optional[pathlib.Path], jinja_macros: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry], macros: Optional[sqlmesh.utils.UniqueKeyDict[str, Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]]], dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType], variables: Optional[Dict[str, Any]], default_catalog: Optional[str]) -> Dict[str, Any]: View Source

3122def render_model_defaults(
3123    defaults: t.Dict[str, t.Any],
3124    module_path: Path,
3125    path: t.Optional[Path],
3126    jinja_macros: t.Optional[JinjaMacroRegistry],
3127    macros: t.Optional[MacroRegistry],
3128    dialect: DialectType,
3129    variables: t.Optional[t.Dict[str, t.Any]],
3130    default_catalog: t.Optional[str],
3131) -> t.Dict[str, t.Any]:
3132    rendered_defaults = render_meta_fields(
3133        fields=defaults,
3134        module_path=module_path,
3135        macros=macros,
3136        jinja_macros=jinja_macros,
3137        variables=variables,
3138        path=path,
3139        dialect=dialect,
3140        default_catalog=default_catalog,
3141    )
3142
3143    # Validate defaults that have macros are rendered to boolean
3144    for boolean in {"optimize_query", "allow_partials", "enabled"}:
3145        var = rendered_defaults.get(boolean)
3146        if var is not None and not isinstance(var, (exp.Boolean, bool)):
3147            raise ConfigError(f"Expected boolean for '{var}', got '{type(var)}' instead")
3148
3149    # Validate the 'interval_unit' if present is an Interval Unit
3150    var = rendered_defaults.get("interval_unit")
3151    if isinstance(var, str):
3152        try:
3153            rendered_defaults["interval_unit"] = IntervalUnit(var)
3154        except ValueError as e:
3155            raise ConfigError(f"Invalid interval unit: {var}") from e
3156
3157    return rendered_defaults

def parse_defaults_properties( defaults: Dict[str, Any], dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Dict[str, Any]: View Source

3160def parse_defaults_properties(
3161    defaults: t.Dict[str, t.Any], dialect: DialectType
3162) -> t.Dict[str, t.Any]:
3163    for prop in PROPERTIES:
3164        default_properties = defaults.get(prop)
3165        for key, value in (default_properties or {}).items():
3166            if isinstance(key, str) and d.SQLMESH_MACRO_PREFIX in str(value):
3167                defaults[prop][key] = exp.maybe_parse(value, dialect=dialect)
3168
3169    return defaults

def render_expression( expression: sqlglot.expressions.core.Expr, module_path: pathlib.Path, path: Optional[pathlib.Path], jinja_macros: Optional[sqlmesh.utils.jinja.JinjaMacroRegistry] = None, macros: Optional[sqlmesh.utils.UniqueKeyDict[str, Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]]] = None, dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType] = None, variables: Optional[Dict[str, Any]] = None, default_catalog: Optional[str] = None, blueprint_variables: Optional[Dict[str, Any]] = None) -> Optional[List[sqlglot.expressions.core.Expr]]: View Source

3172def render_expression(
3173    expression: exp.Expr,
3174    module_path: Path,
3175    path: t.Optional[Path],
3176    jinja_macros: t.Optional[JinjaMacroRegistry] = None,
3177    macros: t.Optional[MacroRegistry] = None,
3178    dialect: DialectType = None,
3179    variables: t.Optional[t.Dict[str, t.Any]] = None,
3180    default_catalog: t.Optional[str] = None,
3181    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
3182) -> t.Optional[t.List[exp.Expr]]:
3183    meta_python_env = make_python_env(
3184        expressions=expression,
3185        jinja_macro_references=None,
3186        module_path=module_path,
3187        macros=macros or macro.get_registry(),
3188        variables=variables,
3189        path=path,
3190        blueprint_variables=blueprint_variables,
3191    )
3192    return ExpressionRenderer(
3193        expression,
3194        dialect,
3195        [],
3196        path=path,
3197        jinja_macro_registry=jinja_macros,
3198        python_env=meta_python_env,
3199        default_catalog=default_catalog,
3200        quote_identifiers=False,
3201        normalize_identifiers=False,
3202    ).render()

META_FIELD_CONVERTER: Dict[str, Callable] = {'start': <function <lambda>>, 'cron': <function <lambda>>, 'cron_tz': <function <lambda>>, 'partitioned_by_': <function _single_expr_or_tuple>, 'clustered_by': <function _single_expr_or_tuple>, 'depends_on_': <function <lambda>>, 'pre': <function _list_of_calls_to_exp>, 'post': <function _list_of_calls_to_exp>, 'audits': <function _list_of_calls_to_exp>, 'columns_to_types_': <function <lambda>>, 'column_descriptions_': <function <lambda>>, 'tags': <function single_value_or_tuple>, 'grains': <function _refs_to_sql>, 'references': <function _refs_to_sql>, 'physical_properties_': <function <lambda>>, 'virtual_properties_': <function <lambda>>, 'session_properties_': <function <lambda>>, 'allow_partials': <function convert>, 'signals': <function <lambda>>, 'formatting': <class 'str'>, 'optimize_query': <class 'str'>, 'virtual_environment_mode': <function <lambda>>, 'dbt_node_info_': <function <lambda>>, 'grants_': <function <lambda>>, 'grants_target_layer': <function <lambda>>}

def get_model_name(path: pathlib.Path) -> str: View Source

3247def get_model_name(path: Path) -> str:
3248    path_parts = list(path.parts[path.parts.index("models") + 1 : -1]) + [path.stem]
3249    return ".".join(path_parts[-3:])

def clickhouse_partition_func( column: sqlglot.expressions.core.Expr, columns_to_types: Optional[Dict[str, sqlglot.expressions.datatypes.DataType]]) -> sqlglot.expressions.core.Expr: View Source

3253def clickhouse_partition_func(
3254    column: exp.Expr, columns_to_types: t.Optional[t.Dict[str, exp.DataType]]
3255) -> exp.Expr:
3256    # `toMonday()` function accepts a Date or DateTime type column
3257
3258    col_type = (columns_to_types and columns_to_types.get(column.name)) or exp.DataType.build(
3259        "UNKNOWN"
3260    )
3261    col_type_is_conformable = col_type.is_type(
3262        exp.DataType.Type.DATE,
3263        exp.DataType.Type.DATE32,
3264        exp.DataType.Type.DATETIME,
3265        exp.DataType.Type.DATETIME64,
3266    )
3267
3268    #  if input column is already a conformable type, just pass the column
3269    if col_type_is_conformable:
3270        return exp.func("toMonday", column, dialect="clickhouse")
3271
3272    # if input column type is not known, cast input to DateTime64
3273    if col_type.is_type(exp.DataType.Type.UNKNOWN):
3274        return exp.func(
3275            "toMonday",
3276            exp.cast(column, exp.DataType.build("DateTime64(9, 'UTC')", dialect="clickhouse")),
3277            dialect="clickhouse",
3278        )
3279
3280    # if input column type is known but not conformable, cast input to DateTime64 and cast output back to original type
3281    return exp.cast(
3282        exp.func(
3283            "toMonday",
3284            exp.cast(column, exp.DataType.build("DateTime64(9, 'UTC')", dialect="clickhouse")),
3285            dialect="clickhouse",
3286        ),
3287        col_type,
3288    )

TIME_COL_PARTITION_FUNC = {'clickhouse': <function clickhouse_partition_func>}