Context
A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and
load your project's models, macros, and audits. Afterwards, you can use the context to create and apply
plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks.
For more information regarding what a context can do, see Context.
Examples:
Creating and applying a plan against the staging environment.
from sqlmesh.core.context import Context
context = Context(paths="example", config="local_config")
plan = context.plan("staging")
context.apply(plan)
Running audits on your data.
from sqlmesh.core.context import Context
context = Context(paths="example", config="local_config")
context.audit("yesterday", "now")
Running tests on your models.
from sqlmesh.core.context import Context
context = Context(paths="example")
context.test()
1""" 2# Context 3 4A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and 5load your project's models, macros, and audits. Afterwards, you can use the context to create and apply 6plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks. 7For more information regarding what a context can do, see `sqlmesh.core.context.Context`. 8 9# Examples: 10 11Creating and applying a plan against the staging environment. 12```python 13from sqlmesh.core.context import Context 14context = Context(paths="example", config="local_config") 15plan = context.plan("staging") 16context.apply(plan) 17``` 18 19Running audits on your data. 20```python 21from sqlmesh.core.context import Context 22context = Context(paths="example", config="local_config") 23context.audit("yesterday", "now") 24``` 25 26Running tests on your models. 27```python 28from sqlmesh.core.context import Context 29context = Context(paths="example") 30context.test() 31``` 32""" 33 34from __future__ import annotations 35 36import abc 37import collections 38import logging 39import sys 40import time 41import traceback 42import typing as t 43from functools import cached_property 44from io import StringIO 45from itertools import chain 46from pathlib import Path 47from shutil import rmtree 48from types import MappingProxyType 49from datetime import datetime 50 51from sqlglot import Dialect, exp 52from sqlglot.helper import first 53from sqlglot.lineage import GraphHTML 54 55from sqlmesh.core import analytics 56from sqlmesh.core import constants as c 57from sqlmesh.core.analytics import python_api_analytics 58from sqlmesh.core.audit import Audit, ModelAudit, StandaloneAudit 59from sqlmesh.core.config import ( 60 CategorizerConfig, 61 Config, 62 load_configs, 63) 64from sqlmesh.core.config.connection import ConnectionConfig 65from sqlmesh.core.config.loader import C 66from sqlmesh.core.config.root import RegexKeyDict 67from sqlmesh.core.console import get_console 68from sqlmesh.core.context_diff import ContextDiff 69from sqlmesh.core.dialect import ( 70 format_model_expressions, 71 is_meta_expression, 72 normalize_model_name, 73 pandas_to_sql, 74 parse, 75 parse_one, 76) 77from sqlmesh.core.engine_adapter import EngineAdapter 78from sqlmesh.core.environment import Environment, EnvironmentNamingInfo, EnvironmentStatements 79from sqlmesh.core.loader import Loader 80from sqlmesh.core.linter.definition import AnnotatedRuleViolation, Linter 81from sqlmesh.core.linter.rules import BUILTIN_RULES 82from sqlmesh.core.macros import ExecutableOrMacro, macro 83from sqlmesh.core.metric import Metric, rewrite 84from sqlmesh.core.model import Model, update_model_schemas 85from sqlmesh.core.config.model import ModelDefaultsConfig 86from sqlmesh.core.notification_target import ( 87 NotificationEvent, 88 NotificationTarget, 89 NotificationTargetManager, 90) 91from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals, PlanExplainer 92from sqlmesh.core.plan.definition import UserProvidedFlags 93from sqlmesh.core.reference import ReferenceGraph 94from sqlmesh.core.scheduler import Scheduler, CompletionStatus 95from sqlmesh.core.schema_loader import create_external_models_file 96from sqlmesh.core.selector import Selector, NativeSelector 97from sqlmesh.core.snapshot import ( 98 DeployabilityIndex, 99 Snapshot, 100 SnapshotEvaluator, 101 SnapshotFingerprint, 102 missing_intervals, 103 to_table_mapping, 104) 105from sqlmesh.core.snapshot.definition import get_next_model_interval_start 106from sqlmesh.core.state_sync import ( 107 CachingStateSync, 108 StateReader, 109 StateSync, 110) 111from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots 112from sqlmesh.core.table_diff import TableDiff 113from sqlmesh.core.test import ( 114 ModelTextTestResult, 115 ModelTestMetadata, 116 generate_test, 117 run_tests, 118 filter_tests_by_patterns, 119) 120from sqlmesh.core.user import User 121from sqlmesh.utils import UniqueKeyDict, Verbosity 122from sqlmesh.utils.concurrency import concurrent_apply_to_values 123from sqlmesh.utils.dag import DAG 124from sqlmesh.utils.date import ( 125 TimeLike, 126 to_timestamp, 127 format_tz_datetime, 128 now_timestamp, 129 now, 130 to_datetime, 131 make_exclusive, 132) 133from sqlmesh.utils.errors import ( 134 CircuitBreakerError, 135 ConfigError, 136 PlanError, 137 SQLMeshError, 138 UncategorizedPlanError, 139 LinterError, 140) 141from sqlmesh.utils.config import print_config 142from sqlmesh.utils.jinja import JinjaMacroRegistry 143from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path 144 145if t.TYPE_CHECKING: 146 import pandas as pd 147 from typing_extensions import Literal 148 149 from sqlmesh.core.engine_adapter._typing import ( 150 BigframeSession, 151 DF, 152 PySparkDataFrame, 153 PySparkSession, 154 SnowparkSession, 155 ) 156 from sqlmesh.core.snapshot import Node 157 158 from sqlmesh.core.snapshot.definition import Intervals 159 160 ModelOrSnapshot = t.Union[str, Model, Snapshot] 161 NodeOrSnapshot = t.Union[str, Model, StandaloneAudit, Snapshot] 162 163logger = logging.getLogger(__name__) 164 165 166class BaseContext(abc.ABC): 167 """The base context which defines methods to execute a model.""" 168 169 @property 170 @abc.abstractmethod 171 def default_dialect(self) -> t.Optional[str]: 172 """Returns the default dialect.""" 173 174 @property 175 @abc.abstractmethod 176 def _model_tables(self) -> t.Dict[str, str]: 177 """Returns a mapping of model names to tables.""" 178 179 @property 180 @abc.abstractmethod 181 def engine_adapter(self) -> EngineAdapter: 182 """Returns an engine adapter.""" 183 184 @property 185 def spark(self) -> t.Optional[PySparkSession]: 186 """Returns the spark session if it exists.""" 187 return self.engine_adapter.spark 188 189 @property 190 def snowpark(self) -> t.Optional[SnowparkSession]: 191 """Returns the snowpark session if it exists.""" 192 return self.engine_adapter.snowpark 193 194 @property 195 def bigframe(self) -> t.Optional[BigframeSession]: 196 """Returns the bigframe session if it exists.""" 197 return self.engine_adapter.bigframe 198 199 @property 200 def default_catalog(self) -> t.Optional[str]: 201 raise NotImplementedError 202 203 def table(self, model_name: str) -> str: 204 get_console().log_warning( 205 "The SQLMesh context's `table` method is deprecated and will be removed " 206 "in a future release. Please use the `resolve_table` method instead." 207 ) 208 return self.resolve_table(model_name) 209 210 def resolve_table(self, model_name: str) -> str: 211 """Gets the physical table name for a given model. 212 213 Args: 214 model_name: The model name. 215 216 Returns: 217 The physical table name. 218 """ 219 model_name = normalize_model_name(model_name, self.default_catalog, self.default_dialect) 220 221 if model_name not in self._model_tables: 222 model_name_list = "\n".join(list(self._model_tables)) 223 logger.debug( 224 f"'{model_name}' not found in model to table mapping. Available model names: \n{model_name_list}" 225 ) 226 raise SQLMeshError( 227 f"Unable to find a table mapping for model '{model_name}'. Has it been spelled correctly?" 228 ) 229 230 # We generate SQL for the default dialect because the table name may be used in a 231 # fetchdf call and so the quotes need to be correct (eg. backticks for bigquery) 232 return parse_one(self._model_tables[model_name]).sql( 233 dialect=self.default_dialect, identify=True 234 ) 235 236 def fetchdf( 237 self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False 238 ) -> pd.DataFrame: 239 """Fetches a dataframe given a sql string or sqlglot expression. 240 241 Args: 242 query: SQL string or sqlglot expression. 243 quote_identifiers: Whether to quote all identifiers in the query. 244 245 Returns: 246 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 247 """ 248 return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers) 249 250 def fetch_pyspark_df( 251 self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False 252 ) -> PySparkDataFrame: 253 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 254 255 Args: 256 query: SQL string or sqlglot expression. 257 quote_identifiers: Whether to quote all identifiers in the query. 258 259 Returns: 260 A PySpark dataframe. 261 """ 262 return self.engine_adapter.fetch_pyspark_df(query, quote_identifiers=quote_identifiers) 263 264 265class ExecutionContext(BaseContext): 266 """The minimal context needed to execute a model. 267 268 Args: 269 engine_adapter: The engine adapter to execute queries against. 270 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 271 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 272 """ 273 274 def __init__( 275 self, 276 engine_adapter: EngineAdapter, 277 snapshots: t.Dict[str, Snapshot], 278 deployability_index: t.Optional[DeployabilityIndex] = None, 279 default_dialect: t.Optional[str] = None, 280 default_catalog: t.Optional[str] = None, 281 is_restatement: t.Optional[bool] = None, 282 parent_intervals: t.Optional[Intervals] = None, 283 variables: t.Optional[t.Dict[str, t.Any]] = None, 284 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 285 ): 286 self.snapshots = snapshots 287 self.deployability_index = deployability_index 288 self._engine_adapter = engine_adapter 289 self._default_catalog = default_catalog 290 self._default_dialect = default_dialect 291 self._variables = variables or {} 292 self._blueprint_variables = blueprint_variables or {} 293 self._is_restatement = is_restatement 294 self._parent_intervals = parent_intervals 295 296 @property 297 def default_dialect(self) -> t.Optional[str]: 298 return self._default_dialect 299 300 @property 301 def engine_adapter(self) -> EngineAdapter: 302 """Returns an engine adapter.""" 303 return self._engine_adapter 304 305 @cached_property 306 def _model_tables(self) -> t.Dict[str, str]: 307 """Returns a mapping of model names to tables.""" 308 return to_table_mapping(self.snapshots.values(), self.deployability_index) 309 310 @property 311 def default_catalog(self) -> t.Optional[str]: 312 return self._default_catalog 313 314 @property 315 def gateway(self) -> t.Optional[str]: 316 """Returns the gateway name.""" 317 return self.var(c.GATEWAY) 318 319 @property 320 def is_restatement(self) -> t.Optional[bool]: 321 return self._is_restatement 322 323 @property 324 def parent_intervals(self) -> t.Optional[Intervals]: 325 return self._parent_intervals 326 327 def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 328 """Returns a variable value.""" 329 return self._variables.get(var_name.lower(), default) 330 331 def blueprint_var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 332 """Returns a blueprint variable value.""" 333 return self._blueprint_variables.get(var_name.lower(), default) 334 335 def with_variables( 336 self, 337 variables: t.Dict[str, t.Any], 338 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 339 ) -> ExecutionContext: 340 """Returns a new ExecutionContext with additional variables.""" 341 return ExecutionContext( 342 self._engine_adapter, 343 self.snapshots, 344 self.deployability_index, 345 self._default_dialect, 346 self._default_catalog, 347 self._is_restatement, 348 variables=variables, 349 blueprint_variables=blueprint_variables, 350 ) 351 352 353class GenericContext(BaseContext, t.Generic[C]): 354 """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. 355 356 Args: 357 notification_targets: The notification target to use. Defaults to what is defined in config. 358 paths: The directories containing SQLMesh files. 359 config: A Config object or the name of a Config object in config.py. 360 connection: The name of the connection. If not specified the first connection as it appears 361 in configuration will be used. 362 test_connection: The name of the connection to use for tests. If not specified the first 363 connection as it appears in configuration will be used. 364 concurrent_tasks: The maximum number of tasks that can use the connection concurrently. 365 load: Whether or not to automatically load all models and macros (default True). 366 console: The rich instance used for printing out CLI command results. 367 users: A list of users to make known to SQLMesh. 368 """ 369 370 CONFIG_TYPE: t.Type[C] 371 """The type of config object to use (default: Config).""" 372 373 PLAN_BUILDER_TYPE = PlanBuilder 374 """The type of plan builder object to use (default: PlanBuilder).""" 375 376 def __init__( 377 self, 378 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 379 state_sync: t.Optional[StateSync] = None, 380 paths: t.Union[str | Path, t.Iterable[str | Path]] = "", 381 config: t.Optional[t.Union[C, str, t.Dict[Path, C]]] = None, 382 gateway: t.Optional[str] = None, 383 concurrent_tasks: t.Optional[int] = None, 384 loader: t.Optional[t.Type[Loader]] = None, 385 load: bool = True, 386 users: t.Optional[t.List[User]] = None, 387 config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, 388 selector: t.Optional[t.Type[Selector]] = None, 389 ): 390 self.configs = ( 391 config 392 if isinstance(config, dict) 393 else load_configs(config, self.CONFIG_TYPE, paths, **(config_loader_kwargs or {})) 394 ) 395 self._projects = {config.project for config in self.configs.values()} 396 self.dag: DAG[str] = DAG() 397 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 398 self._audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("audits") 399 self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( 400 "standaloneaudits" 401 ) 402 self._model_test_metadata: t.List[ModelTestMetadata] = [] 403 self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} 404 self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} 405 self._models_with_tests: t.Set[str] = set() 406 407 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 408 self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") 409 self._jinja_macros = JinjaMacroRegistry() 410 self._requirements: t.Dict[str, str] = {} 411 self._environment_statements: t.List[EnvironmentStatements] = [] 412 self._excluded_requirements: t.Set[str] = set() 413 self._engine_adapter: t.Optional[EngineAdapter] = None 414 self._linters: t.Dict[str, Linter] = {} 415 self._loaded: bool = False 416 self._selector_cls = selector or NativeSelector 417 418 self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) 419 420 self._all_dialects: t.Set[str] = {self.config.dialect or ""} 421 422 if self.config.disable_anonymized_analytics: 423 analytics.disable_analytics() 424 425 self.gateway = gateway 426 self._scheduler = self.config.get_scheduler(self.gateway) 427 self.environment_ttl = self.config.environment_ttl 428 self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) 429 self.auto_categorize_changes = self.config.plan.auto_categorize_changes 430 self.selected_gateway = (gateway or self.config.default_gateway_name).lower() 431 432 gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults 433 if gw_model_defaults: 434 # Merge global model defaults with the selected gateway's, if it's overriden 435 global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) 436 gateway_defaults = gw_model_defaults.model_dump(exclude_unset=True) 437 438 self.config.model_defaults = ModelDefaultsConfig( 439 **{**global_defaults, **gateway_defaults} 440 ) 441 442 # This allows overriding the default dialect's normalization strategy, so for example 443 # one can do `dialect="duckdb,normalization_strategy=lowercase"` and this will be 444 # applied to the DuckDB dialect globally 445 if "normalization_strategy" in str(self.config.dialect): 446 dialect = Dialect.get_or_raise(self.config.dialect) 447 type(dialect).NORMALIZATION_STRATEGY = dialect.normalization_strategy 448 449 self._loaders = [ 450 (loader or config.loader)(self, path, **config.loader_kwargs) 451 for path, config in self.configs.items() 452 ] 453 454 self._concurrent_tasks = concurrent_tasks 455 self._state_connection_config = ( 456 self.config.get_state_connection(self.gateway) or self.connection_config 457 ) 458 459 self._snapshot_evaluator: t.Optional[SnapshotEvaluator] = None 460 461 self.console = get_console() 462 setattr(self.console, "dialect", self.config.dialect) 463 464 self._provided_state_sync: t.Optional[StateSync] = state_sync 465 self._state_sync: t.Optional[StateSync] = None 466 467 # Should we dedupe notification_targets? If so how? 468 self.notification_targets = (notification_targets or []) + self.config.notification_targets 469 self.users = (users or []) + self.config.users 470 self.users = list({user.username: user for user in self.users}.values()) 471 self._register_notification_targets() 472 473 if load: 474 self.load() 475 476 @property 477 def default_dialect(self) -> t.Optional[str]: 478 return self.config.dialect 479 480 @property 481 def engine_adapter(self) -> EngineAdapter: 482 """Returns the default engine adapter.""" 483 if self._engine_adapter is None: 484 self._engine_adapter = self.connection_config.create_engine_adapter() 485 return self._engine_adapter 486 487 @property 488 def snapshot_evaluator(self) -> SnapshotEvaluator: 489 if not self._snapshot_evaluator: 490 self._snapshot_evaluator = SnapshotEvaluator( 491 { 492 gateway: adapter.with_settings(execute_log_level=logging.INFO) 493 for gateway, adapter in self.engine_adapters.items() 494 }, 495 ddl_concurrent_tasks=self.concurrent_tasks, 496 selected_gateway=self.selected_gateway, 497 ) 498 return self._snapshot_evaluator 499 500 def execution_context( 501 self, 502 deployability_index: t.Optional[DeployabilityIndex] = None, 503 engine_adapter: t.Optional[EngineAdapter] = None, 504 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 505 ) -> ExecutionContext: 506 """Returns an execution context.""" 507 return ExecutionContext( 508 engine_adapter=engine_adapter or self.engine_adapter, 509 snapshots=snapshots or self.snapshots, 510 deployability_index=deployability_index, 511 default_dialect=self.default_dialect, 512 default_catalog=self.default_catalog, 513 ) 514 515 @python_api_analytics 516 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 517 """Update or insert a model. 518 519 The context's models dictionary will be updated to include these changes. 520 521 Args: 522 model: Model name or instance to update. 523 kwargs: The kwargs to update the model with. 524 525 Returns: 526 A new instance of the updated or inserted model. 527 """ 528 model = self.get_model(model, raise_if_missing=True) 529 if not model.enabled: 530 raise SQLMeshError(f"The disabled model '{model.name}' cannot be upserted") 531 path = model._path 532 533 model = model.copy(update=kwargs) 534 model._path = path 535 536 self.dag.add(model.fqn, model.depends_on) 537 538 self._models.update( 539 { 540 model.fqn: model, 541 # bust the fingerprint cache for all downstream models 542 **{fqn: self._models[fqn].copy() for fqn in self.dag.downstream(model.fqn)}, 543 } 544 ) 545 546 update_model_schemas( 547 self.dag, 548 models=self._models, 549 cache_dir=self.cache_dir, 550 ) 551 552 if model.dialect: 553 self._all_dialects.add(model.dialect) 554 555 model.validate_definition() 556 557 return model 558 559 def scheduler( 560 self, 561 environment: t.Optional[str] = None, 562 snapshot_evaluator: t.Optional[SnapshotEvaluator] = None, 563 ) -> Scheduler: 564 """Returns the built-in scheduler. 565 566 Args: 567 environment: The target environment to source model snapshots from, or None 568 if snapshots should be sourced from the currently loaded local state. 569 570 Returns: 571 The built-in scheduler instance. 572 """ 573 snapshots: t.Iterable[Snapshot] 574 if environment is not None: 575 stored_environment = self.state_sync.get_environment(environment) 576 if stored_environment is None: 577 raise ConfigError(f"Environment '{environment}' was not found.") 578 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 579 else: 580 snapshots = self.snapshots.values() 581 582 if not snapshots: 583 raise ConfigError("No models were found") 584 585 return self.create_scheduler(snapshots, snapshot_evaluator or self.snapshot_evaluator) 586 587 def create_scheduler( 588 self, snapshots: t.Iterable[Snapshot], snapshot_evaluator: SnapshotEvaluator 589 ) -> Scheduler: 590 """Creates the built-in scheduler. 591 592 Args: 593 snapshots: The snapshots to schedule. 594 595 Returns: 596 The built-in scheduler instance. 597 """ 598 return Scheduler( 599 snapshots, 600 snapshot_evaluator, 601 self.state_sync, 602 default_catalog=self.default_catalog, 603 max_workers=self.concurrent_tasks, 604 console=self.console, 605 notification_target_manager=self.notification_target_manager, 606 ) 607 608 @property 609 def state_sync(self) -> StateSync: 610 if not self._state_sync: 611 self._state_sync = self._new_state_sync() 612 613 if self._state_sync.get_versions(validate=False).schema_version == 0: 614 self.console.log_status_update("Initializing new project state...") 615 self._state_sync.migrate() 616 self._state_sync.get_versions() 617 self._state_sync = CachingStateSync(self._state_sync) # type: ignore 618 return self._state_sync 619 620 @property 621 def state_reader(self) -> StateReader: 622 return self.state_sync 623 624 def refresh(self) -> None: 625 """Refresh all models that have been updated.""" 626 if any(loader.reload_needed() for loader in self._loaders): 627 self.load() 628 629 def load(self, update_schemas: bool = True) -> GenericContext[C]: 630 """Load all files in the context's path.""" 631 load_start_ts = time.perf_counter() 632 633 loaded_projects = [loader.load() for loader in self._loaders] 634 635 self.dag = DAG() 636 self._standalone_audits.clear() 637 self._audits.clear() 638 self._macros.clear() 639 self._models.clear() 640 self._metrics.clear() 641 self._requirements.clear() 642 self._excluded_requirements.clear() 643 self._linters.clear() 644 self._environment_statements = [] 645 self._model_test_metadata.clear() 646 self._model_test_metadata_path_index.clear() 647 self._model_test_metadata_fully_qualified_name_index.clear() 648 self._models_with_tests.clear() 649 650 for loader, project in zip(self._loaders, loaded_projects): 651 self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) 652 self._macros.update(project.macros) 653 self._models.update(project.models) 654 self._metrics.update(project.metrics) 655 self._audits.update(project.audits) 656 self._standalone_audits.update(project.standalone_audits) 657 self._requirements.update(project.requirements) 658 self._excluded_requirements.update(project.excluded_requirements) 659 self._environment_statements.extend(project.environment_statements) 660 661 self._model_test_metadata.extend(project.model_test_metadata) 662 for metadata in project.model_test_metadata: 663 if metadata.path not in self._model_test_metadata_path_index: 664 self._model_test_metadata_path_index[metadata.path] = [] 665 self._model_test_metadata_path_index[metadata.path].append(metadata) 666 self._model_test_metadata_fully_qualified_name_index[ 667 metadata.fully_qualified_test_name 668 ] = metadata 669 self._models_with_tests.add(metadata.model_name) 670 671 config = loader.config 672 self._linters[config.project] = Linter.from_rules( 673 BUILTIN_RULES.union(project.user_rules), config.linter 674 ) 675 676 # Load environment statements from state for projects not in current load 677 if any(self._projects): 678 prod = self.state_reader.get_environment(c.PROD) 679 if prod: 680 existing_statements = self.state_reader.get_environment_statements(c.PROD) 681 for stmt in existing_statements: 682 if stmt.project and stmt.project not in self._projects: 683 self._environment_statements.append(stmt) 684 685 uncached = set() 686 687 if any(self._projects): 688 prod = self.state_reader.get_environment(c.PROD) 689 690 if prod: 691 for snapshot in self.state_reader.get_snapshots(prod.snapshots).values(): 692 if snapshot.node.project in self._projects: 693 uncached.add(snapshot.name) 694 else: 695 local_store = self._standalone_audits if snapshot.is_audit else self._models 696 if snapshot.name in local_store: 697 uncached.add(snapshot.name) 698 else: 699 local_store[snapshot.name] = snapshot.node # type: ignore 700 701 for model in self._models.values(): 702 self.dag.add(model.fqn, model.depends_on) 703 704 if update_schemas: 705 for fqn in self.dag: 706 model = self._models.get(fqn) # type: ignore 707 708 if not model or fqn in uncached: 709 continue 710 711 # make a copy of remote models that depend on local models or in the downstream chain 712 # without this, a SELECT * FROM local will not propogate properly because the downstream 713 # model will get mutated (schema changes) but the object is the same as the remote cache 714 if any(dep in uncached for dep in model.depends_on): 715 uncached.add(fqn) 716 self._models.update({fqn: model.copy(update={"mapping_schema": {}})}) 717 continue 718 719 update_model_schemas( 720 self.dag, 721 models=self._models, 722 cache_dir=self.cache_dir, 723 ) 724 725 models = self.models.values() 726 for model in models: 727 # The model definition can be validated correctly only after the schema is set. 728 model.validate_definition() 729 730 duplicates = set(self._models) & set(self._standalone_audits) 731 if duplicates: 732 raise ConfigError( 733 f"Models and Standalone audits cannot have the same name: {duplicates}" 734 ) 735 736 self._all_dialects = {m.dialect for m in self._models.values() if m.dialect} | { 737 self.default_dialect or "" 738 } 739 740 analytics.collector.on_project_loaded( 741 project_type=self._project_type, 742 models_count=len(self._models), 743 audits_count=len(self._audits), 744 standalone_audits_count=len(self._standalone_audits), 745 macros_count=len(self._macros), 746 jinja_macros_count=len(self._jinja_macros.root_macros), 747 load_time_sec=time.perf_counter() - load_start_ts, 748 state_sync_fingerprint=self._scheduler.state_sync_fingerprint(self), 749 project_name=self.config.project, 750 ) 751 752 self._loaded = True 753 return self 754 755 @python_api_analytics 756 def run( 757 self, 758 environment: t.Optional[str] = None, 759 *, 760 start: t.Optional[TimeLike] = None, 761 end: t.Optional[TimeLike] = None, 762 execution_time: t.Optional[TimeLike] = None, 763 skip_janitor: bool = False, 764 ignore_cron: bool = False, 765 select_models: t.Optional[t.Collection[str]] = None, 766 exit_on_env_update: t.Optional[int] = None, 767 no_auto_upstream: bool = False, 768 ) -> CompletionStatus: 769 """Run the entire dag through the scheduler. 770 771 Args: 772 environment: The target environment to source model snapshots from and virtually update. Default: prod. 773 start: The start of the interval to render. 774 end: The end of the interval to render. 775 execution_time: The date/time time reference to use for execution time. Defaults to now. 776 skip_janitor: Whether to skip the janitor task. 777 ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals. 778 select_models: A list of model selection expressions to filter models that should run. Note that 779 upstream dependencies of selected models will also be evaluated. 780 exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update 781 to the target environment. 782 no_auto_upstream: Whether to not force upstream models to run. Only applicable when using `select_models`. 783 784 Returns: 785 True if the run was successful, False otherwise. 786 """ 787 environment = environment or self.config.default_target_environment 788 environment = Environment.sanitize_name(environment) 789 if not skip_janitor and environment.lower() == c.PROD: 790 self._run_janitor() 791 792 self.notification_target_manager.notify( 793 NotificationEvent.RUN_START, environment=environment 794 ) 795 analytics_run_id = analytics.collector.on_run_start( 796 engine_type=self.snapshot_evaluator.adapter.dialect, 797 state_sync_type=self.state_sync.state_type(), 798 ) 799 self._load_materializations() 800 801 env_check_attempts_num = max( 802 1, 803 self.config.run.environment_check_max_wait 804 // self.config.run.environment_check_interval, 805 ) 806 807 def _block_until_finalized() -> str: 808 for _ in range(env_check_attempts_num): 809 assert environment is not None # mypy 810 environment_state = self.state_sync.get_environment(environment) 811 if not environment_state: 812 raise SQLMeshError(f"Environment '{environment}' was not found.") 813 if environment_state.finalized_ts: 814 return environment_state.plan_id 815 self.console.log_warning( 816 f"Environment '{environment}' is being updated by plan '{environment_state.plan_id}'. " 817 f"Retrying in {self.config.run.environment_check_interval} seconds..." 818 ) 819 time.sleep(self.config.run.environment_check_interval) 820 raise SQLMeshError( 821 f"Exceeded the maximum wait time for environment '{environment}' to be ready. " 822 "This means that the environment either failed to update or the update is taking longer than expected. " 823 "See https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#run to adjust the timeout settings." 824 ) 825 826 success = False 827 interrupted = False 828 done = False 829 while not done: 830 plan_id_at_start = _block_until_finalized() 831 832 def _has_environment_changed() -> bool: 833 assert environment is not None # mypy 834 current_environment_state = self.state_sync.get_environment(environment) 835 return ( 836 not current_environment_state 837 or current_environment_state.plan_id != plan_id_at_start 838 or not current_environment_state.finalized_ts 839 ) 840 841 try: 842 completion_status = self._run( 843 environment, 844 start=start, 845 end=end, 846 execution_time=execution_time, 847 ignore_cron=ignore_cron, 848 select_models=select_models, 849 circuit_breaker=_has_environment_changed, 850 no_auto_upstream=no_auto_upstream, 851 ) 852 done = True 853 except CircuitBreakerError: 854 self.console.log_warning( 855 f"Environment '{environment}' modified while running. Restarting the run..." 856 ) 857 if exit_on_env_update: 858 interrupted = True 859 done = True 860 except Exception as e: 861 self.notification_target_manager.notify( 862 NotificationEvent.RUN_FAILURE, traceback.format_exc() 863 ) 864 logger.info("Run failed.", exc_info=e) 865 analytics.collector.on_run_end( 866 run_id=analytics_run_id, succeeded=False, interrupted=False, error=e 867 ) 868 raise e 869 870 if completion_status.is_success or interrupted: 871 self.notification_target_manager.notify( 872 NotificationEvent.RUN_END, environment=environment 873 ) 874 self.console.log_success(f"Run finished for environment '{environment}'") 875 elif completion_status.is_failure: 876 self.notification_target_manager.notify( 877 NotificationEvent.RUN_FAILURE, "See console logs for details." 878 ) 879 880 analytics.collector.on_run_end( 881 run_id=analytics_run_id, succeeded=success, interrupted=interrupted 882 ) 883 884 if interrupted and exit_on_env_update is not None: 885 sys.exit(exit_on_env_update) 886 887 return completion_status 888 889 @python_api_analytics 890 def run_janitor(self, ignore_ttl: bool) -> bool: 891 success = False 892 893 if self.console.start_cleanup(ignore_ttl): 894 try: 895 self._run_janitor(ignore_ttl) 896 success = True 897 finally: 898 self.console.stop_cleanup(success=success) 899 900 return success 901 902 @python_api_analytics 903 def destroy(self) -> bool: 904 success = False 905 906 # Collect resources to be deleted 907 environments = self.state_reader.get_environments() 908 schemas_to_delete = set() 909 tables_to_delete = set() 910 views_to_delete = set() 911 all_snapshot_infos = set() 912 913 # For each environment find schemas and tables 914 for environment in environments: 915 all_snapshot_infos.update(environment.snapshots) 916 snapshots = self.state_reader.get_snapshots(environment.snapshots).values() 917 for snapshot in snapshots: 918 if snapshot.is_model and not snapshot.is_symbolic: 919 # Get the appropriate adapter 920 if environment.gateway_managed and snapshot.model_gateway: 921 adapter = self.engine_adapters.get( 922 snapshot.model_gateway, self.engine_adapter 923 ) 924 else: 925 adapter = self.engine_adapter 926 927 if environment.suffix_target.is_schema or environment.suffix_target.is_catalog: 928 schema = snapshot.qualified_view_name.schema_for_environment( 929 environment.naming_info, dialect=adapter.dialect 930 ) 931 catalog = snapshot.qualified_view_name.catalog_for_environment( 932 environment.naming_info, dialect=adapter.dialect 933 ) 934 if catalog: 935 schemas_to_delete.add(f"{catalog}.{schema}") 936 else: 937 schemas_to_delete.add(schema) 938 939 if environment.suffix_target.is_table: 940 view_name = snapshot.qualified_view_name.for_environment( 941 environment.naming_info, dialect=adapter.dialect 942 ) 943 views_to_delete.add(view_name) 944 945 # Add snapshot tables 946 table_name = snapshot.table_name() 947 tables_to_delete.add(table_name) 948 949 if self.console.start_destroy(schemas_to_delete, views_to_delete, tables_to_delete): 950 try: 951 success = self._destroy() 952 finally: 953 self.console.stop_destroy(success=success) 954 955 return success 956 957 @t.overload 958 def get_model( 959 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: Literal[True] = True 960 ) -> Model: ... 961 962 @t.overload 963 def get_model( 964 self, 965 model_or_snapshot: ModelOrSnapshot, 966 raise_if_missing: Literal[False] = False, 967 ) -> t.Optional[Model]: ... 968 969 def get_model( 970 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: bool = False 971 ) -> t.Optional[Model]: 972 """Returns a model with the given name or None if a model with such name doesn't exist. 973 974 Args: 975 model_or_snapshot: A model name, model, or snapshot. 976 raise_if_missing: Raises an error if a model is not found. 977 978 Returns: 979 The expected model. 980 """ 981 if isinstance(model_or_snapshot, Snapshot): 982 return model_or_snapshot.model 983 if not isinstance(model_or_snapshot, str): 984 return model_or_snapshot 985 986 try: 987 # We should try all dialects referenced in the project for cases when models use mixed dialects. 988 for dialect in self._all_dialects: 989 normalized_name = normalize_model_name( 990 model_or_snapshot, 991 dialect=dialect, 992 default_catalog=self.default_catalog, 993 ) 994 if normalized_name in self._models: 995 return self._models[normalized_name] 996 except: 997 pass 998 999 if raise_if_missing: 1000 if model_or_snapshot.endswith((".sql", ".py")): 1001 msg = "Resolving models by path is not supported, please pass in the model name instead." 1002 else: 1003 msg = f"Cannot find model with name '{model_or_snapshot}'" 1004 1005 raise SQLMeshError(msg) 1006 1007 return None 1008 1009 @t.overload 1010 def get_snapshot(self, node_or_snapshot: NodeOrSnapshot) -> t.Optional[Snapshot]: ... 1011 1012 @t.overload 1013 def get_snapshot( 1014 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[True] 1015 ) -> Snapshot: ... 1016 1017 @t.overload 1018 def get_snapshot( 1019 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[False] 1020 ) -> t.Optional[Snapshot]: ... 1021 1022 def get_snapshot( 1023 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: bool = False 1024 ) -> t.Optional[Snapshot]: 1025 """Returns a snapshot with the given name or None if a snapshot with such name doesn't exist. 1026 1027 Args: 1028 node_or_snapshot: A node name, node, or snapshot. 1029 raise_if_missing: Raises an error if a snapshot is not found. 1030 1031 Returns: 1032 The expected snapshot. 1033 """ 1034 if isinstance(node_or_snapshot, Snapshot): 1035 return node_or_snapshot 1036 fqn = self._node_or_snapshot_to_fqn(node_or_snapshot) 1037 snapshot = self.snapshots.get(fqn) 1038 1039 if raise_if_missing and not snapshot: 1040 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1041 1042 return snapshot 1043 1044 def config_for_path(self, path: Path) -> t.Tuple[Config, Path]: 1045 """Returns the config and path of the said project for a given file path.""" 1046 for config_path, config in self.configs.items(): 1047 try: 1048 path.relative_to(config_path) 1049 return config, config_path 1050 except ValueError: 1051 pass 1052 return self.config, self.path 1053 1054 def config_for_node(self, node: Model | Audit) -> Config: 1055 path = node._path 1056 if path is None: 1057 return self.config 1058 return self.config_for_path(path)[0] # type: ignore 1059 1060 @property 1061 def models(self) -> MappingProxyType[str, Model]: 1062 """Returns all registered models in this context.""" 1063 return MappingProxyType(self._models) 1064 1065 @property 1066 def metrics(self) -> MappingProxyType[str, Metric]: 1067 """Returns all registered metrics in this context.""" 1068 return MappingProxyType(self._metrics) 1069 1070 @property 1071 def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: 1072 """Returns all registered standalone audits in this context.""" 1073 return MappingProxyType(self._standalone_audits) 1074 1075 @property 1076 def models_with_tests(self) -> t.Set[str]: 1077 """Returns all models with tests in this context.""" 1078 return self._models_with_tests 1079 1080 @property 1081 def snapshots(self) -> t.Dict[str, Snapshot]: 1082 """Generates and returns snapshots based on models registered in this context. 1083 1084 If one of the snapshots has been previously stored in the persisted state, the stored 1085 instance will be returned. 1086 """ 1087 return self._snapshots() 1088 1089 @property 1090 def requirements(self) -> t.Dict[str, str]: 1091 """Returns the Python dependencies of the project loaded in this context.""" 1092 return self._requirements.copy() 1093 1094 @cached_property 1095 def default_catalog(self) -> t.Optional[str]: 1096 return self.default_catalog_per_gateway.get(self.selected_gateway) 1097 1098 @python_api_analytics 1099 def render( 1100 self, 1101 model_or_snapshot: ModelOrSnapshot, 1102 *, 1103 start: t.Optional[TimeLike] = None, 1104 end: t.Optional[TimeLike] = None, 1105 execution_time: t.Optional[TimeLike] = None, 1106 expand: t.Union[bool, t.Iterable[str]] = False, 1107 **kwargs: t.Any, 1108 ) -> exp.Expr: 1109 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 1110 1111 Args: 1112 model_or_snapshot: The model, model name, or snapshot to render. 1113 start: The start of the interval to render. 1114 end: The end of the interval to render. 1115 execution_time: The date/time time reference to use for execution time. Defaults to now. 1116 expand: Whether or not to use expand materialized models, defaults to False. 1117 If True, all referenced models are expanded as raw queries. 1118 If a list, only referenced models are expanded as raw queries. 1119 1120 Returns: 1121 The rendered expression. 1122 """ 1123 execution_time = execution_time or now() 1124 1125 model = self.get_model(model_or_snapshot, raise_if_missing=True) 1126 1127 if expand and not isinstance(expand, bool): 1128 expand = { 1129 normalize_model_name( 1130 x, default_catalog=self.default_catalog, dialect=self.default_dialect 1131 ) 1132 for x in expand 1133 } 1134 1135 expand = self.dag.upstream(model.fqn) if expand is True else expand or [] 1136 1137 if model.is_seed: 1138 import pandas as pd 1139 1140 df = next( 1141 model.render( 1142 context=self.execution_context( 1143 engine_adapter=self._get_engine_adapter(model.gateway) 1144 ), 1145 start=start, 1146 end=end, 1147 execution_time=execution_time, 1148 **kwargs, 1149 ) 1150 ) 1151 return next(pandas_to_sql(t.cast(pd.DataFrame, df), model.columns_to_types)) 1152 1153 snapshots = self.snapshots 1154 deployability_index = DeployabilityIndex.create(snapshots.values(), start=start) 1155 1156 return model.render_query_or_raise( 1157 start=start, 1158 end=end, 1159 execution_time=execution_time, 1160 snapshots=snapshots, 1161 expand=expand, 1162 deployability_index=deployability_index, 1163 engine_adapter=self._get_engine_adapter(model.gateway), 1164 **kwargs, 1165 ) 1166 1167 @python_api_analytics 1168 def evaluate( 1169 self, 1170 model_or_snapshot: ModelOrSnapshot, 1171 start: TimeLike, 1172 end: TimeLike, 1173 execution_time: TimeLike, 1174 limit: t.Optional[int] = None, 1175 **kwargs: t.Any, 1176 ) -> DF: 1177 """Evaluate a model or snapshot (running its query against a DB/Engine). 1178 1179 This method is used to test or iterate on models without side effects. 1180 1181 Args: 1182 model_or_snapshot: The model, model name, or snapshot to render. 1183 start: The start of the interval to evaluate. 1184 end: The end of the interval to evaluate. 1185 execution_time: The date/time time reference to use for execution time. 1186 limit: A limit applied to the model. 1187 """ 1188 snapshots = self.snapshots 1189 fqn = self._node_or_snapshot_to_fqn(model_or_snapshot) 1190 if fqn not in snapshots: 1191 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1192 snapshot = snapshots[fqn] 1193 1194 # Expand all uncategorized parents since physical tables don't exist for them yet 1195 expand = [ 1196 parent 1197 for parent in self.dag.upstream(snapshot.model.fqn) 1198 if (parent_snapshot := snapshots.get(parent)) 1199 and parent_snapshot.is_model 1200 and parent_snapshot.model.is_sql 1201 and not parent_snapshot.categorized 1202 ] 1203 1204 df = self.snapshot_evaluator.evaluate_and_fetch( 1205 snapshot, 1206 start=start, 1207 end=end, 1208 execution_time=execution_time, 1209 snapshots=self.snapshots, 1210 limit=limit or c.DEFAULT_MAX_LIMIT, 1211 expand=expand, 1212 ) 1213 1214 if df is None: 1215 raise RuntimeError(f"Error evaluating {snapshot.name}") 1216 1217 return df 1218 1219 @python_api_analytics 1220 def format( 1221 self, 1222 transpile: t.Optional[str] = None, 1223 rewrite_casts: t.Optional[bool] = None, 1224 append_newline: t.Optional[bool] = None, 1225 *, 1226 check: t.Optional[bool] = None, 1227 paths: t.Optional[t.Tuple[t.Union[str, Path], ...]] = None, 1228 **kwargs: t.Any, 1229 ) -> bool: 1230 """Format all SQL models and audits.""" 1231 filtered_targets = [ 1232 target 1233 for target in chain(self._models.values(), self._audits.values()) 1234 if target._path is not None 1235 and target._path.suffix == ".sql" 1236 and (not paths or any(target._path.samefile(p) for p in paths)) 1237 ] 1238 unformatted_file_paths = [] 1239 1240 for target in filtered_targets: 1241 if ( 1242 target._path is None or target.formatting is False 1243 ): # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop 1244 continue 1245 1246 with open(target._path, "r+", encoding="utf-8") as file: 1247 before = file.read() 1248 1249 after = self._format( 1250 target, 1251 before, 1252 transpile=transpile, 1253 rewrite_casts=rewrite_casts, 1254 append_newline=append_newline, 1255 **kwargs, 1256 ) 1257 1258 if not check: 1259 file.seek(0) 1260 file.write(after) 1261 file.truncate() 1262 elif before != after: 1263 unformatted_file_paths.append(target._path) 1264 1265 if unformatted_file_paths: 1266 for path in unformatted_file_paths: 1267 self.console.log_status_update(f"{path} needs reformatting.") 1268 self.console.log_status_update( 1269 f"\n{len(unformatted_file_paths)} file(s) need reformatting." 1270 ) 1271 return False 1272 1273 return True 1274 1275 def _format( 1276 self, 1277 target: Model | Audit, 1278 before: str, 1279 *, 1280 transpile: t.Optional[str] = None, 1281 rewrite_casts: t.Optional[bool] = None, 1282 append_newline: t.Optional[bool] = None, 1283 **kwargs: t.Any, 1284 ) -> str: 1285 expressions = parse(before, default_dialect=self.config_for_node(target).dialect) 1286 if transpile and is_meta_expression(expressions[0]): 1287 for prop in expressions[0].expressions: 1288 if prop.name.lower() == "dialect": 1289 prop.replace( 1290 exp.Property( 1291 this="dialect", 1292 value=exp.Literal.string(transpile or target.dialect), 1293 ) 1294 ) 1295 1296 format_config = self.config_for_node(target).format 1297 after = format_model_expressions( 1298 expressions, 1299 transpile or target.dialect, 1300 rewrite_casts=( 1301 rewrite_casts if rewrite_casts is not None else not format_config.no_rewrite_casts 1302 ), 1303 **{**format_config.generator_options, **kwargs}, 1304 ) 1305 1306 if append_newline is None: 1307 append_newline = format_config.append_newline 1308 if append_newline: 1309 after += "\n" 1310 1311 return after 1312 1313 @python_api_analytics 1314 def plan( 1315 self, 1316 environment: t.Optional[str] = None, 1317 *, 1318 start: t.Optional[TimeLike] = None, 1319 end: t.Optional[TimeLike] = None, 1320 execution_time: t.Optional[TimeLike] = None, 1321 create_from: t.Optional[str] = None, 1322 skip_tests: t.Optional[bool] = None, 1323 restate_models: t.Optional[t.Iterable[str]] = None, 1324 no_gaps: t.Optional[bool] = None, 1325 skip_backfill: t.Optional[bool] = None, 1326 empty_backfill: t.Optional[bool] = None, 1327 forward_only: t.Optional[bool] = None, 1328 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1329 allow_additive_models: t.Optional[t.Collection[str]] = None, 1330 no_prompts: t.Optional[bool] = None, 1331 auto_apply: t.Optional[bool] = None, 1332 no_auto_categorization: t.Optional[bool] = None, 1333 effective_from: t.Optional[TimeLike] = None, 1334 include_unmodified: t.Optional[bool] = None, 1335 select_models: t.Optional[t.Collection[str]] = None, 1336 backfill_models: t.Optional[t.Collection[str]] = None, 1337 categorizer_config: t.Optional[CategorizerConfig] = None, 1338 enable_preview: t.Optional[bool] = None, 1339 no_diff: t.Optional[bool] = None, 1340 run: t.Optional[bool] = None, 1341 diff_rendered: t.Optional[bool] = None, 1342 skip_linter: t.Optional[bool] = None, 1343 explain: t.Optional[bool] = None, 1344 ignore_cron: t.Optional[bool] = None, 1345 min_intervals: t.Optional[int] = None, 1346 ) -> Plan: 1347 """Interactively creates a plan. 1348 1349 This method compares the current context with the target environment. It then presents 1350 the differences and asks whether to backfill each modified model. 1351 1352 Args: 1353 environment: The environment to diff and plan against. 1354 start: The start date of the backfill if there is one. 1355 end: The end date of the backfill if there is one. 1356 execution_time: The date/time reference to use for execution time. Defaults to now. 1357 create_from: The environment to create the target environment from if it 1358 doesn't exist. If not specified, the "prod" environment will be used. 1359 skip_tests: Unit tests are run by default so this will skip them if enabled 1360 restate_models: A list of either internal or external models, or tags, that need to be restated 1361 for the given plan interval. If the target environment is a production environment, 1362 ALL snapshots that depended on these upstream tables will have their intervals deleted 1363 (even ones not in this current environment). Only the snapshots in this environment will 1364 be backfilled whereas others need to be recovered on a future plan application. For development 1365 environments only snapshots that are part of this plan will be affected. 1366 no_gaps: Whether to ensure that new snapshots for models that are already a 1367 part of the target environment have no data gaps when compared against previous 1368 snapshots for same models. 1369 skip_backfill: Whether to skip the backfill step. Default: False. 1370 empty_backfill: Like skip_backfill, but also records processed intervals. 1371 forward_only: Whether the purpose of the plan is to make forward only changes. 1372 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1373 allow_additive_models: Models whose forward-only changes are allowed to be additive. 1374 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 1375 if this flag is set to true and there are uncategorized changes the plan creation will 1376 fail. Default: False. 1377 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 1378 no_auto_categorization: Indicates whether to disable automatic categorization of model 1379 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1380 option determines the behavior. 1381 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1382 project config by default. 1383 effective_from: The effective date from which to apply forward-only changes on production. 1384 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1385 select_models: A list of model selection strings to filter the models that should be included into this plan. 1386 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1387 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1388 no_diff: Hide text differences for changed models. 1389 run: Whether to run latest intervals as part of the plan application. 1390 diff_rendered: Whether the diff should compare raw vs rendered models 1391 skip_linter: Linter runs by default so this will skip it if enabled 1392 explain: Whether to explain the plan instead of applying it. 1393 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1394 on every model when checking for missing intervals 1395 1396 Returns: 1397 The populated Plan object. 1398 """ 1399 plan_builder = self.plan_builder( 1400 environment, 1401 start=start, 1402 end=end, 1403 execution_time=execution_time, 1404 create_from=create_from, 1405 skip_tests=skip_tests, 1406 restate_models=restate_models, 1407 no_gaps=no_gaps, 1408 skip_backfill=skip_backfill, 1409 empty_backfill=empty_backfill, 1410 forward_only=forward_only, 1411 allow_destructive_models=allow_destructive_models, 1412 allow_additive_models=allow_additive_models, 1413 no_auto_categorization=no_auto_categorization, 1414 effective_from=effective_from, 1415 include_unmodified=include_unmodified, 1416 select_models=select_models, 1417 backfill_models=backfill_models, 1418 categorizer_config=categorizer_config, 1419 enable_preview=enable_preview, 1420 run=run, 1421 diff_rendered=diff_rendered, 1422 skip_linter=skip_linter, 1423 explain=explain, 1424 ignore_cron=ignore_cron, 1425 min_intervals=min_intervals, 1426 ) 1427 1428 plan = plan_builder.build() 1429 1430 if no_auto_categorization or plan.uncategorized: 1431 # Prompts are required if the auto categorization is disabled 1432 # or if there are any uncategorized snapshots in the plan 1433 no_prompts = False 1434 1435 if explain: 1436 auto_apply = True 1437 1438 self.console.plan( 1439 plan_builder, 1440 auto_apply if auto_apply is not None else self.config.plan.auto_apply, 1441 self.default_catalog, 1442 no_diff=no_diff if no_diff is not None else self.config.plan.no_diff, 1443 no_prompts=no_prompts if no_prompts is not None else self.config.plan.no_prompts, 1444 ) 1445 1446 return plan 1447 1448 @python_api_analytics 1449 def plan_builder( 1450 self, 1451 environment: t.Optional[str] = None, 1452 *, 1453 start: t.Optional[TimeLike] = None, 1454 end: t.Optional[TimeLike] = None, 1455 execution_time: t.Optional[TimeLike] = None, 1456 create_from: t.Optional[str] = None, 1457 skip_tests: t.Optional[bool] = None, 1458 restate_models: t.Optional[t.Iterable[str]] = None, 1459 no_gaps: t.Optional[bool] = None, 1460 skip_backfill: t.Optional[bool] = None, 1461 empty_backfill: t.Optional[bool] = None, 1462 forward_only: t.Optional[bool] = None, 1463 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1464 allow_additive_models: t.Optional[t.Collection[str]] = None, 1465 no_auto_categorization: t.Optional[bool] = None, 1466 effective_from: t.Optional[TimeLike] = None, 1467 include_unmodified: t.Optional[bool] = None, 1468 select_models: t.Optional[t.Collection[str]] = None, 1469 backfill_models: t.Optional[t.Collection[str]] = None, 1470 categorizer_config: t.Optional[CategorizerConfig] = None, 1471 enable_preview: t.Optional[bool] = None, 1472 run: t.Optional[bool] = None, 1473 diff_rendered: t.Optional[bool] = None, 1474 skip_linter: t.Optional[bool] = None, 1475 explain: t.Optional[bool] = None, 1476 ignore_cron: t.Optional[bool] = None, 1477 min_intervals: t.Optional[int] = None, 1478 always_include_local_changes: t.Optional[bool] = None, 1479 ) -> PlanBuilder: 1480 """Creates a plan builder. 1481 1482 Args: 1483 environment: The environment to diff and plan against. 1484 start: The start date of the backfill if there is one. 1485 end: The end date of the backfill if there is one. 1486 execution_time: The date/time reference to use for execution time. Defaults to now. 1487 create_from: The environment to create the target environment from if it 1488 doesn't exist. If not specified, the "prod" environment will be used. 1489 skip_tests: Unit tests are run by default so this will skip them if enabled 1490 restate_models: A list of either internal or external models, or tags, that need to be restated 1491 for the given plan interval. If the target environment is a production environment, 1492 ALL snapshots that depended on these upstream tables will have their intervals deleted 1493 (even ones not in this current environment). Only the snapshots in this environment will 1494 be backfilled whereas others need to be recovered on a future plan application. For development 1495 environments only snapshots that are part of this plan will be affected. 1496 no_gaps: Whether to ensure that new snapshots for models that are already a 1497 part of the target environment have no data gaps when compared against previous 1498 snapshots for same models. 1499 skip_backfill: Whether to skip the backfill step. Default: False. 1500 empty_backfill: Like skip_backfill, but also records processed intervals. 1501 forward_only: Whether the purpose of the plan is to make forward only changes. 1502 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1503 no_auto_categorization: Indicates whether to disable automatic categorization of model 1504 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1505 option determines the behavior. 1506 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1507 project config by default. 1508 effective_from: The effective date from which to apply forward-only changes on production. 1509 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1510 select_models: A list of model selection strings to filter the models that should be included into this plan. 1511 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1512 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1513 run: Whether to run latest intervals as part of the plan application. 1514 diff_rendered: Whether the diff should compare raw vs rendered models 1515 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1516 on every model when checking for missing intervals 1517 always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. 1518 However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. 1519 1520 Returns: 1521 The plan builder. 1522 """ 1523 kwargs: t.Dict[str, t.Optional[UserProvidedFlags]] = { 1524 "start": start, 1525 "end": end, 1526 "execution_time": execution_time, 1527 "create_from": create_from, 1528 "skip_tests": skip_tests, 1529 "restate_models": list(restate_models) if restate_models is not None else None, 1530 "no_gaps": no_gaps, 1531 "skip_backfill": skip_backfill, 1532 "empty_backfill": empty_backfill, 1533 "forward_only": forward_only, 1534 "allow_destructive_models": list(allow_destructive_models) 1535 if allow_destructive_models is not None 1536 else None, 1537 "allow_additive_models": list(allow_additive_models) 1538 if allow_additive_models is not None 1539 else None, 1540 "no_auto_categorization": no_auto_categorization, 1541 "effective_from": effective_from, 1542 "include_unmodified": include_unmodified, 1543 "select_models": list(select_models) if select_models is not None else None, 1544 "backfill_models": list(backfill_models) if backfill_models is not None else None, 1545 "enable_preview": enable_preview, 1546 "run": run, 1547 "diff_rendered": diff_rendered, 1548 "skip_linter": skip_linter, 1549 "min_intervals": min_intervals, 1550 } 1551 user_provided_flags: t.Dict[str, UserProvidedFlags] = { 1552 k: v for k, v in kwargs.items() if v is not None 1553 } 1554 1555 skip_tests = explain or skip_tests or False 1556 no_gaps = no_gaps or False 1557 skip_backfill = skip_backfill or False 1558 empty_backfill = empty_backfill or False 1559 run = run or False 1560 diff_rendered = diff_rendered or False 1561 skip_linter = skip_linter or False 1562 min_intervals = min_intervals or 0 1563 1564 environment = environment or self.config.default_target_environment 1565 environment = Environment.sanitize_name(environment) 1566 is_dev = environment != c.PROD 1567 1568 if include_unmodified is None: 1569 include_unmodified = self.config.plan.include_unmodified 1570 1571 if skip_backfill and not no_gaps and not is_dev: 1572 # note: we deliberately don't mention the --no-gaps flag in case the plan came from the sqlmesh_dbt command 1573 # todo: perhaps we could have better error messages if we check sys.argv[0] for which cli is running? 1574 self.console.log_warning( 1575 "Skipping the backfill stage for production can lead to unexpected results, such as tables being empty or incremental data with non-contiguous time ranges being made available.\n" 1576 "If you are doing this deliberately to create an empty version of a table to test a change, please consider using Virtual Data Environments instead." 1577 ) 1578 1579 if not skip_linter: 1580 self.lint_models() 1581 1582 self._run_plan_tests(skip_tests=skip_tests) 1583 1584 environment_ttl = ( 1585 self.environment_ttl if environment not in self.pinned_environments else None 1586 ) 1587 1588 model_selector = self._new_selector() 1589 1590 if allow_destructive_models: 1591 expanded_destructive_models = model_selector.expand_model_selections( 1592 allow_destructive_models 1593 ) 1594 else: 1595 expanded_destructive_models = None 1596 1597 if allow_additive_models: 1598 expanded_additive_models = model_selector.expand_model_selections(allow_additive_models) 1599 else: 1600 expanded_additive_models = None 1601 1602 if backfill_models: 1603 backfill_models = model_selector.expand_model_selections(backfill_models) 1604 else: 1605 backfill_models = None 1606 1607 models_override: t.Optional[UniqueKeyDict[str, Model]] = None 1608 if select_models: 1609 try: 1610 models_override = model_selector.select_models( 1611 select_models, 1612 environment, 1613 fallback_env_name=create_from or c.PROD, 1614 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1615 ) 1616 except SQLMeshError as e: 1617 logger.exception(e) # ensure the full stack trace is logged 1618 raise PlanError( 1619 f"{e}\nCheck the SQLMesh log file for the full stack trace.\nIf the model has been fixed locally, please ensure that the --select-model expression includes it." 1620 ) 1621 if not backfill_models: 1622 # Only backfill selected models unless explicitly specified. 1623 backfill_models = model_selector.expand_model_selections(select_models) 1624 1625 expanded_restate_models = None 1626 if restate_models is not None: 1627 expanded_restate_models = model_selector.expand_model_selections(restate_models) 1628 1629 if (restate_models is not None and not expanded_restate_models) or ( 1630 backfill_models is not None and not backfill_models 1631 ): 1632 raise PlanError( 1633 "Selector did not return any models. Please check your model selection and try again." 1634 ) 1635 1636 if always_include_local_changes is None: 1637 # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes 1638 force_no_diff = restate_models is not None or ( 1639 backfill_models is not None and not backfill_models 1640 ) 1641 else: 1642 force_no_diff = not always_include_local_changes 1643 1644 snapshots = self._snapshots(models_override) 1645 context_diff = self._context_diff( 1646 environment or c.PROD, 1647 snapshots=snapshots, 1648 create_from=create_from, 1649 force_no_diff=force_no_diff, 1650 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1651 diff_rendered=diff_rendered, 1652 always_recreate_environment=self.config.plan.always_recreate_environment, 1653 ) 1654 modified_model_names = { 1655 *context_diff.modified_snapshots, 1656 *[s.name for s in context_diff.added], 1657 } 1658 1659 if ( 1660 is_dev 1661 and not include_unmodified 1662 and backfill_models is None 1663 and expanded_restate_models is None 1664 ): 1665 # Only backfill modified and added models. 1666 # This ensures that no models outside the impacted sub-DAG(s) will be backfilled unexpectedly. 1667 backfill_models = modified_model_names or None 1668 1669 max_interval_end_per_model = None 1670 default_start, default_end = None, None 1671 if not run: 1672 ignore_cron = False 1673 max_interval_end_per_model = self._get_max_interval_end_per_model( 1674 snapshots, backfill_models 1675 ) 1676 # If no end date is specified, use the max interval end from prod 1677 # to prevent unintended evaluation of the entire DAG. 1678 default_start, default_end = self._get_plan_default_start_end( 1679 snapshots, 1680 max_interval_end_per_model, 1681 backfill_models, 1682 modified_model_names, 1683 execution_time or now(), 1684 ) 1685 1686 # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model. 1687 self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values()) 1688 1689 start_override_per_model = self._calculate_start_override_per_model( 1690 min_intervals, 1691 start or default_start, 1692 end or default_end, 1693 execution_time or now(), 1694 backfill_models, 1695 snapshots, 1696 max_interval_end_per_model, 1697 ) 1698 1699 if not self.config.virtual_environment_mode.is_full: 1700 forward_only = True 1701 elif forward_only is None: 1702 forward_only = self.config.plan.forward_only 1703 1704 # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments 1705 # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition 1706 restate_all_snapshots = ( 1707 expanded_restate_models is not None 1708 and not is_dev 1709 and self.config.virtual_environment_mode.is_full 1710 ) 1711 1712 return self.PLAN_BUILDER_TYPE( 1713 context_diff=context_diff, 1714 start=start, 1715 end=end, 1716 execution_time=execution_time, 1717 apply=self.apply, 1718 restate_models=expanded_restate_models, 1719 restate_all_snapshots=restate_all_snapshots, 1720 backfill_models=backfill_models, 1721 no_gaps=no_gaps, 1722 skip_backfill=skip_backfill, 1723 empty_backfill=empty_backfill, 1724 is_dev=is_dev, 1725 forward_only=forward_only, 1726 allow_destructive_models=expanded_destructive_models, 1727 allow_additive_models=expanded_additive_models, 1728 environment_ttl=environment_ttl, 1729 environment_suffix_target=self.config.environment_suffix_target, 1730 environment_catalog_mapping=self.environment_catalog_mapping, 1731 categorizer_config=categorizer_config or self.auto_categorize_changes, 1732 auto_categorization_enabled=not no_auto_categorization, 1733 effective_from=effective_from, 1734 include_unmodified=include_unmodified, 1735 default_start=default_start, 1736 default_end=default_end, 1737 enable_preview=( 1738 enable_preview if enable_preview is not None else self._plan_preview_enabled 1739 ), 1740 end_bounded=not run, 1741 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1742 start_override_per_model=start_override_per_model, 1743 end_override_per_model=max_interval_end_per_model, 1744 console=self.console, 1745 user_provided_flags=user_provided_flags, 1746 selected_models={ 1747 dbt_unique_id 1748 for model in model_selector.expand_model_selections(select_models or "*") 1749 if (dbt_unique_id := snapshots[model].node.dbt_unique_id) 1750 }, 1751 explain=explain or False, 1752 ignore_cron=ignore_cron or False, 1753 ) 1754 1755 def apply( 1756 self, 1757 plan: Plan, 1758 circuit_breaker: t.Optional[t.Callable[[], bool]] = None, 1759 ) -> None: 1760 """Applies a plan by pushing snapshots and backfilling data. 1761 1762 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 1763 to backfill all models. 1764 1765 Args: 1766 plan: The plan to apply. 1767 circuit_breaker: An optional handler which checks if the apply should be aborted. 1768 """ 1769 if ( 1770 not plan.context_diff.has_changes 1771 and not plan.requires_backfill 1772 and not plan.has_unmodified_unpromoted 1773 ): 1774 return 1775 if plan.uncategorized: 1776 raise UncategorizedPlanError("Can't apply a plan with uncategorized changes.") 1777 1778 if plan.explain: 1779 explainer = PlanExplainer( 1780 state_reader=self.state_reader, 1781 default_catalog=self.default_catalog, 1782 console=self.console, 1783 ) 1784 explainer.evaluate(plan.to_evaluatable()) 1785 return 1786 1787 self.notification_target_manager.notify( 1788 NotificationEvent.APPLY_START, 1789 environment=plan.environment_naming_info.name, 1790 plan_id=plan.plan_id, 1791 ) 1792 try: 1793 self._apply(plan, circuit_breaker) 1794 except Exception as e: 1795 self.notification_target_manager.notify( 1796 NotificationEvent.APPLY_FAILURE, 1797 environment=plan.environment_naming_info.name, 1798 plan_id=plan.plan_id, 1799 exc=traceback.format_exc(), 1800 ) 1801 logger.info("Plan application failed.", exc_info=e) 1802 raise e 1803 self.notification_target_manager.notify( 1804 NotificationEvent.APPLY_END, 1805 environment=plan.environment_naming_info.name, 1806 plan_id=plan.plan_id, 1807 ) 1808 1809 @python_api_analytics 1810 def invalidate_environment(self, name: str, sync: bool = False) -> None: 1811 """Invalidates the target environment by setting its expiration timestamp to now. 1812 1813 Args: 1814 name: The name of the environment to invalidate. 1815 sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will 1816 be deleted asynchronously by the janitor process. 1817 """ 1818 name = Environment.sanitize_name(name) 1819 self.state_sync.invalidate_environment(name) 1820 if sync: 1821 self._cleanup_environments() 1822 self.console.log_success(f"Environment '{name}' deleted.") 1823 else: 1824 self.console.log_success(f"Environment '{name}' invalidated.") 1825 1826 @python_api_analytics 1827 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> bool: 1828 """Show a diff of the current context with a given environment. 1829 1830 Args: 1831 environment: The environment to diff against. 1832 detailed: Show the actual SQL differences if True. 1833 1834 Returns: 1835 True if there are changes, False otherwise. 1836 """ 1837 environment = environment or self.config.default_target_environment 1838 environment = Environment.sanitize_name(environment) 1839 context_diff = self._context_diff(environment) 1840 self.console.show_environment_difference_summary( 1841 context_diff, 1842 no_diff=not detailed, 1843 ) 1844 if context_diff.has_changes: 1845 self.console.show_model_difference_summary( 1846 context_diff, 1847 EnvironmentNamingInfo.from_environment_catalog_mapping( 1848 self.environment_catalog_mapping, 1849 name=environment, 1850 suffix_target=self.config.environment_suffix_target, 1851 normalize_name=context_diff.normalize_environment_name, 1852 ), 1853 self.default_catalog, 1854 no_diff=not detailed, 1855 ) 1856 return context_diff.has_changes 1857 1858 @python_api_analytics 1859 def table_diff( 1860 self, 1861 source: str, 1862 target: str, 1863 on: t.Optional[t.List[str] | exp.Expr] = None, 1864 skip_columns: t.Optional[t.List[str]] = None, 1865 select_models: t.Optional[t.Collection[str]] = None, 1866 where: t.Optional[str | exp.Expr] = None, 1867 limit: int = 20, 1868 show: bool = True, 1869 show_sample: bool = True, 1870 decimals: int = 3, 1871 skip_grain_check: bool = False, 1872 warn_grain_check: bool = False, 1873 temp_schema: t.Optional[str] = None, 1874 schema_diff_ignore_case: bool = False, 1875 **kwargs: t.Any, # catch-all to prevent an 'unexpected keyword argument' error if an table_diff extension passes in some extra arguments 1876 ) -> t.List[TableDiff]: 1877 """Show a diff between two tables. 1878 1879 Args: 1880 source: The source environment or table. 1881 target: The target environment or table. 1882 on: The join condition, table aliases must be "s" and "t" for source and target. 1883 If omitted, the table's grain will be used. 1884 skip_columns: The columns to skip when computing the table diff. 1885 select_models: The models or snapshots to use when environments are passed in. 1886 where: An optional where statement to filter results. 1887 limit: The limit of the sample dataframe. 1888 show: Show the table diff output in the console. 1889 show_sample: Show the sample dataframe in the console. Requires show=True. 1890 decimals: The number of decimal places to keep when comparing floating point columns. 1891 skip_grain_check: Skip check for rows that contain null or duplicate grains. 1892 temp_schema: The schema to use for temporary tables. 1893 1894 Returns: 1895 The list of TableDiff objects containing schema and summary differences. 1896 """ 1897 1898 if "|" in source or "|" in target: 1899 raise ConfigError( 1900 "Cross-database table diffing is available in Tobiko Cloud. Read more here: " 1901 "https://sqlmesh.readthedocs.io/en/stable/guides/tablediff/#diffing-tables-or-views-across-gateways" 1902 ) 1903 1904 table_diffs: t.List[TableDiff] = [] 1905 1906 # Diffs multiple or a single model across two environments 1907 if select_models: 1908 source_env = self.state_reader.get_environment(source) 1909 target_env = self.state_reader.get_environment(target) 1910 if not source_env: 1911 raise SQLMeshError(f"Could not find environment '{source}'") 1912 if not target_env: 1913 raise SQLMeshError(f"Could not find environment '{target}'") 1914 criteria = ", ".join(f"'{c}'" for c in select_models) 1915 try: 1916 selected_models = self._new_selector().expand_model_selections(select_models) 1917 if not selected_models: 1918 self.console.log_status_update( 1919 f"No models matched the selection criteria: {criteria}" 1920 ) 1921 except Exception as e: 1922 raise SQLMeshError(e) 1923 1924 models_to_diff: t.List[ 1925 t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Expr]] 1926 ] = [] 1927 models_without_grain: t.List[Model] = [] 1928 source_snapshots_to_name = { 1929 snapshot.name: snapshot for snapshot in source_env.snapshots 1930 } 1931 target_snapshots_to_name = { 1932 snapshot.name: snapshot for snapshot in target_env.snapshots 1933 } 1934 1935 for model_fqn in selected_models: 1936 model = self._models[model_fqn] 1937 adapter = self._get_engine_adapter(model.gateway) 1938 source_snapshot = source_snapshots_to_name.get(model.fqn) 1939 target_snapshot = target_snapshots_to_name.get(model.fqn) 1940 1941 if target_snapshot and source_snapshot: 1942 if (source_snapshot.fingerprint != target_snapshot.fingerprint) and ( 1943 (source_snapshot.version != target_snapshot.version) 1944 or source_snapshot.is_forward_only 1945 ): 1946 # Compare the virtual layer instead of the physical layer because the virtual layer is guaranteed to point 1947 # to the correct/active snapshot for the model in the specified environment, taking into account things like dev previews 1948 source = source_snapshot.qualified_view_name.for_environment( 1949 source_env.naming_info, adapter.dialect 1950 ) 1951 target = target_snapshot.qualified_view_name.for_environment( 1952 target_env.naming_info, adapter.dialect 1953 ) 1954 model_on = on or model.on 1955 if not model_on: 1956 models_without_grain.append(model) 1957 else: 1958 models_to_diff.append((model, adapter, source, target, model_on)) 1959 1960 if models_without_grain: 1961 model_names = "\n".join( 1962 f"─ {model.name} \n at '{model._path}'" for model in models_without_grain 1963 ) 1964 message = ( 1965 "SQLMesh doesn't know how to join the tables for the following models:\n" 1966 f"{model_names}\n\n" 1967 "Please specify a `grain` in each model definition. It must be unique and not null." 1968 ) 1969 if warn_grain_check: 1970 self.console.log_warning(message) 1971 else: 1972 raise SQLMeshError(message) 1973 1974 if models_to_diff: 1975 self.console.show_table_diff_details( 1976 [model[0].name for model in models_to_diff], 1977 ) 1978 1979 self.console.start_table_diff_progress(len(models_to_diff)) 1980 try: 1981 tasks_num = min(len(models_to_diff), self.concurrent_tasks) 1982 table_diffs = concurrent_apply_to_values( 1983 list(models_to_diff), 1984 lambda model_info: self._model_diff( 1985 model=model_info[0], 1986 adapter=model_info[1], 1987 source=model_info[2], 1988 target=model_info[3], 1989 on=model_info[4], 1990 source_alias=source_env.name, 1991 target_alias=target_env.name, 1992 limit=limit, 1993 decimals=decimals, 1994 skip_columns=skip_columns, 1995 where=where, 1996 show=show, 1997 temp_schema=temp_schema, 1998 skip_grain_check=skip_grain_check, 1999 schema_diff_ignore_case=schema_diff_ignore_case, 2000 ), 2001 tasks_num=tasks_num, 2002 ) 2003 self.console.stop_table_diff_progress(success=True) 2004 except: 2005 self.console.stop_table_diff_progress(success=False) 2006 raise 2007 elif selected_models: 2008 self.console.log_status_update( 2009 f"No models contain differences with the selection criteria: {criteria}" 2010 ) 2011 2012 else: 2013 table_diffs = [ 2014 self._table_diff( 2015 source=source, 2016 target=target, 2017 source_alias=source, 2018 target_alias=target, 2019 limit=limit, 2020 decimals=decimals, 2021 adapter=self.engine_adapter, 2022 on=on, 2023 skip_columns=skip_columns, 2024 where=where, 2025 schema_diff_ignore_case=schema_diff_ignore_case, 2026 ) 2027 ] 2028 2029 if show: 2030 self.console.show_table_diff(table_diffs, show_sample, skip_grain_check, temp_schema) 2031 2032 return table_diffs 2033 2034 def _model_diff( 2035 self, 2036 model: Model, 2037 adapter: EngineAdapter, 2038 source: str, 2039 target: str, 2040 source_alias: str, 2041 target_alias: str, 2042 limit: int, 2043 decimals: int, 2044 on: t.Optional[t.List[str] | exp.Expr] = None, 2045 skip_columns: t.Optional[t.List[str]] = None, 2046 where: t.Optional[str | exp.Expr] = None, 2047 show: bool = True, 2048 temp_schema: t.Optional[str] = None, 2049 skip_grain_check: bool = False, 2050 schema_diff_ignore_case: bool = False, 2051 ) -> TableDiff: 2052 self.console.start_table_diff_model_progress(model.name) 2053 2054 table_diff = self._table_diff( 2055 on=on, 2056 skip_columns=skip_columns, 2057 where=where, 2058 limit=limit, 2059 decimals=decimals, 2060 model=model, 2061 adapter=adapter, 2062 source=source, 2063 target=target, 2064 source_alias=source_alias, 2065 target_alias=target_alias, 2066 schema_diff_ignore_case=schema_diff_ignore_case, 2067 ) 2068 2069 if show: 2070 # Trigger row_diff in parallel execution so it's available for ordered display later 2071 table_diff.row_diff(temp_schema=temp_schema, skip_grain_check=skip_grain_check) 2072 2073 self.console.update_table_diff_progress(model.name) 2074 2075 return table_diff 2076 2077 def _table_diff( 2078 self, 2079 source: str, 2080 target: str, 2081 source_alias: str, 2082 target_alias: str, 2083 limit: int, 2084 decimals: int, 2085 adapter: EngineAdapter, 2086 on: t.Optional[t.List[str] | exp.Expr] = None, 2087 model: t.Optional[Model] = None, 2088 skip_columns: t.Optional[t.List[str]] = None, 2089 where: t.Optional[str | exp.Expr] = None, 2090 schema_diff_ignore_case: bool = False, 2091 ) -> TableDiff: 2092 if not on: 2093 raise SQLMeshError( 2094 "SQLMesh doesn't know how to join the two tables. Specify the `grains` in each model definition or pass join column names in separate `-o` flags." 2095 ) 2096 2097 return TableDiff( 2098 adapter=adapter.with_settings(execute_log_level=logger.getEffectiveLevel()), 2099 source=source, 2100 target=target, 2101 on=on, 2102 skip_columns=skip_columns, 2103 where=where, 2104 source_alias=source_alias, 2105 target_alias=target_alias, 2106 limit=limit, 2107 decimals=decimals, 2108 model_name=model.name if model else None, 2109 model_dialect=model.dialect if model else None, 2110 schema_diff_ignore_case=schema_diff_ignore_case, 2111 ) 2112 2113 @python_api_analytics 2114 def get_dag( 2115 self, select_models: t.Optional[t.Collection[str]] = None, **options: t.Any 2116 ) -> GraphHTML: 2117 """Gets an HTML object representation of the DAG. 2118 2119 Args: 2120 select_models: A list of model selection strings that should be included in the dag. 2121 Returns: 2122 An html object that renders the dag. 2123 """ 2124 dag = ( 2125 self.dag.prune(*self._new_selector().expand_model_selections(select_models)) 2126 if select_models 2127 else self.dag 2128 ) 2129 2130 nodes = {} 2131 edges: t.List[t.Dict] = [] 2132 2133 for node, deps in dag.graph.items(): 2134 nodes[node] = { 2135 "id": node, 2136 "label": node.split(".")[-1], 2137 "title": f"<span>{node}</span>", 2138 } 2139 edges.extend({"from": d, "to": node} for d in deps) 2140 2141 return GraphHTML( 2142 nodes, 2143 edges, 2144 options={ 2145 "height": "100%", 2146 "width": "100%", 2147 "interaction": {}, 2148 "layout": { 2149 "hierarchical": { 2150 "enabled": True, 2151 "nodeSpacing": 200, 2152 "sortMethod": "directed", 2153 }, 2154 }, 2155 "nodes": { 2156 "shape": "box", 2157 }, 2158 **options, 2159 }, 2160 ) 2161 2162 @python_api_analytics 2163 def render_dag(self, path: str, select_models: t.Optional[t.Collection[str]] = None) -> None: 2164 """Render the dag as HTML and save it to a file. 2165 2166 Args: 2167 path: filename to save the dag html to 2168 select_models: A list of model selection strings that should be included in the dag. 2169 """ 2170 file_path = Path(path) 2171 suffix = file_path.suffix 2172 if suffix != ".html": 2173 if suffix: 2174 get_console().log_warning( 2175 f"The extension {suffix} does not designate an html file. A file with a `.html` extension will be created instead." 2176 ) 2177 path = str(file_path.with_suffix(".html")) 2178 2179 with open(path, "w", encoding="utf-8") as file: 2180 file.write(str(self.get_dag(select_models))) 2181 2182 @python_api_analytics 2183 def create_test( 2184 self, 2185 model: str, 2186 input_queries: t.Dict[str, str], 2187 overwrite: bool = False, 2188 variables: t.Optional[t.Dict[str, str]] = None, 2189 path: t.Optional[str] = None, 2190 name: t.Optional[str] = None, 2191 include_ctes: bool = False, 2192 ) -> None: 2193 """Generate a unit test fixture for a given model. 2194 2195 Args: 2196 model: The model to test. 2197 input_queries: Mapping of model names to queries. Each model included in this mapping 2198 will be populated in the test based on the results of the corresponding query. 2199 overwrite: Whether to overwrite the existing test in case of a file path collision. 2200 When set to False, an error will be raised if there is such a collision. 2201 variables: Key-value pairs that will define variables needed by the model. 2202 path: The file path corresponding to the fixture, relative to the test directory. 2203 By default, the fixture will be created under the test directory and the file name 2204 will be inferred from the test's name. 2205 name: The name of the test. This is inferred from the model name by default. 2206 include_ctes: When true, CTE fixtures will also be generated. 2207 """ 2208 input_queries = { 2209 # The get_model here has two purposes: return normalized names & check for missing deps 2210 self.get_model(dep, raise_if_missing=True).fqn: query 2211 for dep, query in input_queries.items() 2212 } 2213 2214 try: 2215 model_to_test = self.get_model(model, raise_if_missing=True) 2216 test_adapter = self.test_connection_config.create_engine_adapter( 2217 register_comments_override=False 2218 ) 2219 2220 generate_test( 2221 model=model_to_test, 2222 input_queries=input_queries, 2223 models=self._models, 2224 engine_adapter=self._get_engine_adapter(model_to_test.gateway), 2225 test_engine_adapter=test_adapter, 2226 project_path=self.path, 2227 overwrite=overwrite, 2228 variables=variables, 2229 path=path, 2230 name=name, 2231 include_ctes=include_ctes, 2232 ) 2233 finally: 2234 if test_adapter: 2235 test_adapter.close() 2236 2237 @python_api_analytics 2238 def test( 2239 self, 2240 match_patterns: t.Optional[t.List[str]] = None, 2241 tests: t.Optional[t.List[str]] = None, 2242 verbosity: Verbosity = Verbosity.DEFAULT, 2243 preserve_fixtures: bool = False, 2244 stream: t.Optional[t.TextIO] = None, 2245 ) -> ModelTextTestResult: 2246 """Discover and run model tests""" 2247 if verbosity >= Verbosity.VERBOSE: 2248 import pandas as pd 2249 2250 pd.set_option("display.max_columns", None) 2251 2252 test_meta = self.select_tests(tests=tests, patterns=match_patterns) 2253 2254 result = run_tests( 2255 model_test_metadata=test_meta, 2256 models=self._models, 2257 config=self.config, 2258 selected_gateway=self.selected_gateway, 2259 dialect=self.default_dialect, 2260 verbosity=verbosity, 2261 preserve_fixtures=preserve_fixtures, 2262 stream=stream, 2263 default_catalog=self.default_catalog, 2264 default_catalog_dialect=self.config.dialect or "", 2265 ) 2266 2267 self.console.log_test_results( 2268 result, 2269 self.test_connection_config._engine_adapter.DIALECT, 2270 ) 2271 2272 return result 2273 2274 @python_api_analytics 2275 def audit( 2276 self, 2277 start: TimeLike, 2278 end: TimeLike, 2279 *, 2280 models: t.Optional[t.Iterator[str]] = None, 2281 execution_time: t.Optional[TimeLike] = None, 2282 ) -> bool: 2283 """Audit models. 2284 2285 Args: 2286 start: The start of the interval to audit. 2287 end: The end of the interval to audit. 2288 models: The models to audit. All models will be audited if not specified. 2289 execution_time: The date/time time reference to use for execution time. Defaults to now. 2290 2291 Returns: 2292 False if any of the audits failed, True otherwise. 2293 """ 2294 2295 snapshots = ( 2296 [self.get_snapshot(model, raise_if_missing=True) for model in models] 2297 if models 2298 else self.snapshots.values() 2299 ) 2300 2301 num_audits = sum(len(snapshot.node.audits_with_args) for snapshot in snapshots) 2302 self.console.log_status_update(f"Found {num_audits} audit(s).") 2303 2304 errors = [] 2305 skipped_count = 0 2306 for snapshot in snapshots: 2307 for audit_result in self.snapshot_evaluator.audit( 2308 snapshot=snapshot, 2309 start=start, 2310 end=end, 2311 execution_time=execution_time, 2312 snapshots=self.snapshots, 2313 ): 2314 audit_id = f"{audit_result.audit.name}" 2315 if audit_result.model: 2316 audit_id += f" on model {audit_result.model.name}" 2317 2318 if audit_result.skipped: 2319 self.console.log_status_update(f"{audit_id} ⏸️ SKIPPED.") 2320 skipped_count += 1 2321 elif audit_result.count: 2322 errors.append(audit_result) 2323 self.console.log_status_update( 2324 f"{audit_id} ❌ [red]FAIL [{audit_result.count}][/red]." 2325 ) 2326 else: 2327 self.console.log_status_update(f"{audit_id} ✅ [green]PASS[/green].") 2328 2329 self.console.log_status_update( 2330 f"\nFinished with {len(errors)} audit error{'' if len(errors) == 1 else 's'} " 2331 f"and {skipped_count} audit{'' if skipped_count == 1 else 's'} skipped." 2332 ) 2333 for error in errors: 2334 self.console.log_status_update( 2335 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 2336 ) 2337 self.console.log_status_update(f"Got {error.count} results, expected 0.") 2338 if error.query: 2339 self.console.show_sql( 2340 f"{error.query.sql(dialect=self.snapshot_evaluator.adapter.dialect)}" 2341 ) 2342 2343 self.console.log_status_update("Done.") 2344 return not errors 2345 2346 @python_api_analytics 2347 def rewrite(self, sql: str, dialect: str = "") -> exp.Expr: 2348 """Rewrite a sql expression with semantic references into an executable query. 2349 2350 https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ 2351 2352 Args: 2353 sql: The sql string to rewrite. 2354 dialect: The dialect of the sql string, defaults to the project dialect. 2355 2356 Returns: 2357 A SQLGlot expression with semantic references expanded. 2358 """ 2359 return rewrite( 2360 sql, 2361 graph=ReferenceGraph(self.models.values()), 2362 metrics=self._metrics, 2363 dialect=dialect or self.default_dialect, 2364 ) 2365 2366 @python_api_analytics 2367 def check_intervals( 2368 self, 2369 environment: t.Optional[str], 2370 no_signals: bool, 2371 select_models: t.Collection[str], 2372 start: t.Optional[TimeLike] = None, 2373 end: t.Optional[TimeLike] = None, 2374 ) -> t.Dict[Snapshot, SnapshotIntervals]: 2375 """Check intervals for a given environment. 2376 2377 Args: 2378 environment: The environment or prod if None. 2379 select_models: A list of model selection strings to show intervals for. 2380 start: The start of the intervals to check. 2381 end: The end of the intervals to check. 2382 """ 2383 2384 environment = environment or c.PROD 2385 env = self.state_reader.get_environment(environment) 2386 if not env: 2387 raise SQLMeshError(f"Environment '{environment}' was not found.") 2388 2389 snapshots = {k.name: v for k, v in self.state_sync.get_snapshots(env.snapshots).items()} 2390 2391 missing = { 2392 k.name: v 2393 for k, v in missing_intervals( 2394 snapshots.values(), start=start, end=end, execution_time=end 2395 ).items() 2396 } 2397 2398 if select_models: 2399 selected: t.Collection[str] = self._select_models_for_run( 2400 select_models, True, snapshots.values() 2401 ) 2402 else: 2403 selected = snapshots.keys() 2404 2405 results = {} 2406 execution_context = self.execution_context(snapshots=snapshots) 2407 2408 for fqn in selected: 2409 snapshot = snapshots[fqn] 2410 intervals = missing.get(fqn) or [] 2411 2412 results[snapshot] = SnapshotIntervals( 2413 snapshot.snapshot_id, 2414 intervals 2415 if no_signals 2416 else snapshot.check_ready_intervals(intervals, execution_context), 2417 ) 2418 2419 return results 2420 2421 @python_api_analytics 2422 def migrate(self) -> None: 2423 """Migrates SQLMesh to the current running version. 2424 2425 Please contact your SQLMesh administrator before doing this. 2426 """ 2427 self.notification_target_manager.notify(NotificationEvent.MIGRATION_START) 2428 self._load_materializations() 2429 try: 2430 self._new_state_sync().migrate( 2431 promoted_snapshots_only=self.config.migration.promoted_snapshots_only, 2432 ) 2433 except Exception as e: 2434 self.notification_target_manager.notify( 2435 NotificationEvent.MIGRATION_FAILURE, traceback.format_exc() 2436 ) 2437 raise e 2438 self.notification_target_manager.notify(NotificationEvent.MIGRATION_END) 2439 2440 @python_api_analytics 2441 def rollback(self) -> None: 2442 """Rolls back SQLMesh to the previous migration. 2443 2444 Please contact your SQLMesh administrator before doing this. This action cannot be undone. 2445 """ 2446 self._new_state_sync().rollback() 2447 2448 @python_api_analytics 2449 def create_external_models(self, strict: bool = False) -> None: 2450 """Create a file to document the schema of external models. 2451 2452 The external models file contains all columns and types of external models, allowing for more 2453 robust lineage, validation, and optimizations. 2454 2455 Args: 2456 strict: If True, raise an error if the external model is missing in the database. 2457 """ 2458 if not self._models: 2459 self.load(update_schemas=False) 2460 2461 for path, config in self.configs.items(): 2462 deprecated_yaml = path / c.EXTERNAL_MODELS_DEPRECATED_YAML 2463 2464 external_models_yaml = ( 2465 path / c.EXTERNAL_MODELS_YAML if not deprecated_yaml.exists() else deprecated_yaml 2466 ) 2467 2468 external_models_gateway: t.Optional[str] = self.gateway or self.config.default_gateway 2469 if not external_models_gateway: 2470 # can happen if there was no --gateway defined and the default_gateway is '' 2471 # which means that the single gateway syntax is being used which means there is 2472 # no named gateway which means we should not stamp `gateway:` on the external models 2473 external_models_gateway = None 2474 2475 create_external_models_file( 2476 path=external_models_yaml, 2477 models=UniqueKeyDict( 2478 "models", 2479 { 2480 fqn: model 2481 for fqn, model in self._models.items() 2482 if self.config_for_node(model) is config 2483 }, 2484 ), 2485 adapter=self.engine_adapter, 2486 state_reader=self.state_reader, 2487 dialect=config.model_defaults.dialect, 2488 gateway=external_models_gateway, 2489 max_workers=self.concurrent_tasks, 2490 strict=strict, 2491 ) 2492 2493 @python_api_analytics 2494 def print_info( 2495 self, skip_connection: bool = False, verbosity: Verbosity = Verbosity.DEFAULT 2496 ) -> None: 2497 """Prints information about connections, models, macros, etc. to the console.""" 2498 self.console.log_status_update(f"Models: {len(self.models)}") 2499 self.console.log_status_update(f"Macros: {len(self._macros) - len(macro.get_registry())}") 2500 2501 if skip_connection: 2502 return 2503 2504 if verbosity >= Verbosity.VERBOSE: 2505 self.console.log_status_update("") 2506 print_config(self.config.get_connection(self.gateway), self.console, "Connection") 2507 print_config( 2508 self.config.get_test_connection(self.gateway), self.console, "Test Connection" 2509 ) 2510 print_config( 2511 self.config.get_state_connection(self.gateway), self.console, "State Connection" 2512 ) 2513 2514 self._try_connection("data warehouse", self.engine_adapter.ping) 2515 state_connection = self.config.get_state_connection(self.gateway) 2516 if state_connection: 2517 self._try_connection("state backend", state_connection.connection_validator()) 2518 2519 @python_api_analytics 2520 def print_environment_names(self) -> None: 2521 """Prints all environment names along with expiry datetime.""" 2522 result = self._new_state_sync().get_environments_summary() 2523 if not result: 2524 raise SQLMeshError( 2525 "This project has no environments. Create an environment using the `sqlmesh plan` command." 2526 ) 2527 self.console.print_environments(result) 2528 2529 def close(self) -> None: 2530 """Releases all resources allocated by this context.""" 2531 if self._snapshot_evaluator: 2532 self._snapshot_evaluator.close() 2533 2534 if self._state_sync: 2535 self._state_sync.close() 2536 2537 def _run( 2538 self, 2539 environment: str, 2540 *, 2541 start: t.Optional[TimeLike], 2542 end: t.Optional[TimeLike], 2543 execution_time: t.Optional[TimeLike], 2544 ignore_cron: bool, 2545 select_models: t.Optional[t.Collection[str]], 2546 circuit_breaker: t.Optional[t.Callable[[], bool]], 2547 no_auto_upstream: bool, 2548 ) -> CompletionStatus: 2549 scheduler = self.scheduler(environment=environment) 2550 snapshots = scheduler.snapshots 2551 2552 if select_models is not None: 2553 select_models = self._select_models_for_run( 2554 select_models, no_auto_upstream, snapshots.values() 2555 ) 2556 2557 completion_status = scheduler.run( 2558 environment, 2559 start=start, 2560 end=end, 2561 execution_time=execution_time, 2562 ignore_cron=ignore_cron, 2563 circuit_breaker=circuit_breaker, 2564 selected_snapshots=select_models, 2565 auto_restatement_enabled=environment.lower() == c.PROD, 2566 run_environment_statements=True, 2567 ) 2568 2569 if completion_status.is_nothing_to_do: 2570 next_run_ready_msg = "" 2571 2572 next_ready_interval_start = get_next_model_interval_start(snapshots.values()) 2573 if next_ready_interval_start: 2574 utc_time = format_tz_datetime(next_ready_interval_start) 2575 local_time = format_tz_datetime(next_ready_interval_start, use_local_timezone=True) 2576 time_msg = local_time if local_time == utc_time else f"{local_time} ({utc_time})" 2577 next_run_ready_msg = f"\n\nNext run will be ready at {time_msg}." 2578 2579 self.console.log_status_update( 2580 f"No models are ready to run. Please wait until a model `cron` interval has elapsed.{next_run_ready_msg}" 2581 ) 2582 2583 return completion_status 2584 2585 def _apply(self, plan: Plan, circuit_breaker: t.Optional[t.Callable[[], bool]]) -> None: 2586 self._scheduler.create_plan_evaluator(self).evaluate( 2587 plan.to_evaluatable(), circuit_breaker=circuit_breaker 2588 ) 2589 2590 @python_api_analytics 2591 def table_name( 2592 self, model_name: str, environment: t.Optional[str] = None, prod: bool = False 2593 ) -> str: 2594 """Returns the name of the pysical table for the given model name in the target environment. 2595 2596 Args: 2597 model_name: The name of the model. 2598 environment: The environment to source the model version from. 2599 prod: If True, return the name of the physical table that will be used in production for the model version 2600 promoted in the target environment. 2601 2602 Returns: 2603 The name of the physical table. 2604 """ 2605 environment = environment or self.config.default_target_environment 2606 fqn = self._node_or_snapshot_to_fqn(model_name) 2607 target_env = self.state_reader.get_environment(environment) 2608 if not target_env: 2609 raise SQLMeshError(f"Environment '{environment}' was not found.") 2610 2611 snapshot_info = None 2612 for s in target_env.snapshots: 2613 if s.name == fqn: 2614 snapshot_info = s 2615 break 2616 if not snapshot_info: 2617 raise SQLMeshError( 2618 f"Model '{model_name}' was not found in environment '{environment}'." 2619 ) 2620 2621 if target_env.name == c.PROD or prod: 2622 return snapshot_info.table_name() 2623 2624 snapshots = self.state_reader.get_snapshots(target_env.snapshots) 2625 deployability_index = DeployabilityIndex.create(snapshots) 2626 2627 return snapshot_info.table_name( 2628 is_deployable=deployability_index.is_deployable(snapshot_info.snapshot_id) 2629 ) 2630 2631 def clear_caches(self) -> None: 2632 paths_to_remove = [path / c.CACHE for path in self.configs] 2633 paths_to_remove.append(self.cache_dir) 2634 2635 if IS_WINDOWS: 2636 paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] 2637 2638 for path in paths_to_remove: 2639 if path.exists(): 2640 rmtree(path) 2641 2642 if isinstance(self._state_sync, CachingStateSync): 2643 self._state_sync.clear_cache() 2644 2645 def export_state( 2646 self, 2647 output_file: Path, 2648 environment_names: t.Optional[t.List[str]] = None, 2649 local_only: bool = False, 2650 confirm: bool = True, 2651 ) -> None: 2652 from sqlmesh.core.state_sync.export_import import export_state 2653 2654 # trigger a connection to the StateSync so we can fail early if there is a problem 2655 # note we still need to do this even if we are doing a local export so we know what 'versions' to write 2656 self.state_sync.get_versions(validate=True) 2657 2658 local_snapshots = self.snapshots if local_only else None 2659 2660 if self.console.start_state_export( 2661 output_file=output_file, 2662 gateway=self.selected_gateway, 2663 state_connection_config=self._state_connection_config, 2664 environment_names=environment_names, 2665 local_only=local_only, 2666 confirm=confirm, 2667 ): 2668 try: 2669 export_state( 2670 state_sync=self.state_sync, 2671 output_file=output_file, 2672 local_snapshots=local_snapshots, 2673 environment_names=environment_names, 2674 console=self.console, 2675 ) 2676 self.console.stop_state_export(success=True, output_file=output_file) 2677 except: 2678 self.console.stop_state_export(success=False, output_file=output_file) 2679 raise 2680 2681 def import_state(self, input_file: Path, clear: bool = False, confirm: bool = True) -> None: 2682 from sqlmesh.core.state_sync.export_import import import_state 2683 2684 if self.console.start_state_import( 2685 input_file=input_file, 2686 gateway=self.selected_gateway, 2687 state_connection_config=self._state_connection_config, 2688 clear=clear, 2689 confirm=confirm, 2690 ): 2691 try: 2692 import_state( 2693 state_sync=self.state_sync, 2694 input_file=input_file, 2695 clear=clear, 2696 console=self.console, 2697 ) 2698 self.console.stop_state_import(success=True, input_file=input_file) 2699 except: 2700 self.console.stop_state_import(success=False, input_file=input_file) 2701 raise 2702 2703 def _run_tests( 2704 self, verbosity: Verbosity = Verbosity.DEFAULT 2705 ) -> t.Tuple[ModelTextTestResult, str]: 2706 test_output_io = StringIO() 2707 result = self.test(stream=test_output_io, verbosity=verbosity) 2708 return result, test_output_io.getvalue() 2709 2710 def _run_plan_tests(self, skip_tests: bool = False) -> t.Optional[ModelTextTestResult]: 2711 if not skip_tests: 2712 result = self.test() 2713 if not result.wasSuccessful(): 2714 raise PlanError( 2715 "Cannot generate plan due to failing test(s). Fix test(s) and run again." 2716 ) 2717 return result 2718 return None 2719 2720 @property 2721 def _model_tables(self) -> t.Dict[str, str]: 2722 """Mapping of model name to physical table name. 2723 2724 If a snapshot has not been versioned yet, its view name will be returned. 2725 """ 2726 return { 2727 fqn: ( 2728 snapshot.table_name() 2729 if snapshot.version 2730 else snapshot.qualified_view_name.for_environment( 2731 EnvironmentNamingInfo.from_environment_catalog_mapping( 2732 self.environment_catalog_mapping, 2733 name=c.PROD, 2734 suffix_target=self.config.environment_suffix_target, 2735 ) 2736 ) 2737 ) 2738 for fqn, snapshot in self.snapshots.items() 2739 } 2740 2741 @cached_property 2742 def cache_dir(self) -> Path: 2743 if self.config.cache_dir: 2744 cache_path = Path(self.config.cache_dir) 2745 if cache_path.is_absolute(): 2746 return cache_path 2747 return self.path / cache_path 2748 2749 # Default to .cache directory in the project path 2750 return self.path / c.CACHE 2751 2752 @cached_property 2753 def engine_adapters(self) -> t.Dict[str, EngineAdapter]: 2754 """Returns all the engine adapters for the gateways defined in the configurations.""" 2755 adapters: t.Dict[str, EngineAdapter] = {self.selected_gateway: self.engine_adapter} 2756 for config in self.configs.values(): 2757 for gateway_name in config.gateways: 2758 if gateway_name not in adapters: 2759 connection = config.get_connection(gateway_name) 2760 adapter = connection.create_engine_adapter( 2761 concurrent_tasks=self.concurrent_tasks, 2762 ) 2763 adapters[gateway_name] = adapter 2764 return adapters 2765 2766 @cached_property 2767 def default_catalog_per_gateway(self) -> t.Dict[str, str]: 2768 """Returns the default catalogs for each engine adapter.""" 2769 return self._scheduler.get_default_catalog_per_gateway(self) 2770 2771 @property 2772 def concurrent_tasks(self) -> int: 2773 if self._concurrent_tasks is None: 2774 self._concurrent_tasks = self.connection_config.concurrent_tasks 2775 return self._concurrent_tasks 2776 2777 @cached_property 2778 def connection_config(self) -> ConnectionConfig: 2779 return self.config.get_connection(self.selected_gateway) 2780 2781 @cached_property 2782 def test_connection_config(self) -> ConnectionConfig: 2783 return self.config.get_test_connection( 2784 self.gateway, 2785 self.default_catalog, 2786 default_catalog_dialect=self.config.dialect, 2787 ) 2788 2789 @cached_property 2790 def environment_catalog_mapping(self) -> RegexKeyDict: 2791 engine_adapter = None 2792 try: 2793 engine_adapter = self.engine_adapter 2794 except Exception: 2795 pass 2796 2797 if ( 2798 self.config.environment_catalog_mapping 2799 and engine_adapter 2800 and not self.engine_adapter.catalog_support.is_multi_catalog_supported 2801 ): 2802 raise SQLMeshError( 2803 "Environment catalog mapping is only supported for engine adapters that support multiple catalogs" 2804 ) 2805 return self.config.environment_catalog_mapping 2806 2807 def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 2808 if gateway: 2809 if adapter := self.engine_adapters.get(gateway): 2810 return adapter 2811 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 2812 return self.engine_adapter 2813 2814 def _snapshots( 2815 self, models_override: t.Optional[UniqueKeyDict[str, Model]] = None 2816 ) -> t.Dict[str, Snapshot]: 2817 nodes = {**(models_override or self._models), **self._standalone_audits} 2818 snapshots = self._nodes_to_snapshots(nodes) 2819 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2820 2821 unrestorable_snapshots = { 2822 snapshot 2823 for snapshot in stored_snapshots.values() 2824 if snapshot.name in nodes and snapshot.unrestorable 2825 } 2826 if unrestorable_snapshots: 2827 for snapshot in unrestorable_snapshots: 2828 logger.info( 2829 "Found a unrestorable snapshot %s. Restamping the model...", snapshot.name 2830 ) 2831 node = nodes[snapshot.name] 2832 nodes[snapshot.name] = node.copy( 2833 update={"stamp": f"revert to {snapshot.identifier}"} 2834 ) 2835 snapshots = self._nodes_to_snapshots(nodes) 2836 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2837 2838 for snapshot in stored_snapshots.values(): 2839 # Keep the original model instance to preserve the query cache. 2840 snapshot.node = snapshots[snapshot.name].node 2841 2842 return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in snapshots.items()} 2843 2844 def _context_diff( 2845 self, 2846 environment: str, 2847 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 2848 create_from: t.Optional[str] = None, 2849 force_no_diff: bool = False, 2850 ensure_finalized_snapshots: bool = False, 2851 diff_rendered: bool = False, 2852 always_recreate_environment: bool = False, 2853 ) -> ContextDiff: 2854 environment = Environment.sanitize_name(environment) 2855 if force_no_diff: 2856 return ContextDiff.create_no_diff(environment, self.state_reader) 2857 2858 return ContextDiff.create( 2859 environment, 2860 snapshots=snapshots or self.snapshots, 2861 create_from=create_from or c.PROD, 2862 state_reader=self.state_reader, 2863 provided_requirements=self._requirements, 2864 excluded_requirements=self._excluded_requirements, 2865 ensure_finalized_snapshots=ensure_finalized_snapshots, 2866 diff_rendered=diff_rendered, 2867 environment_statements=self._environment_statements, 2868 gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, 2869 infer_python_dependencies=self.config.infer_python_dependencies, 2870 always_recreate_environment=always_recreate_environment, 2871 ) 2872 2873 def _destroy(self) -> bool: 2874 # Invalidate all environments, including prod 2875 for environment in self.state_reader.get_environments(): 2876 self.state_sync.invalidate_environment(name=environment.name, protect_prod=False) 2877 self.console.log_success(f"Environment '{environment.name}' invalidated.") 2878 2879 # Run janitor to clean up all objects 2880 self._run_janitor(ignore_ttl=True) 2881 2882 # Remove state tables, including backup tables 2883 self.state_sync.remove_state(including_backup=True) 2884 self.console.log_status_update("State tables removed.") 2885 2886 # Finally clear caches 2887 self.clear_caches() 2888 2889 return True 2890 2891 def _run_janitor(self, ignore_ttl: bool = False) -> None: 2892 current_ts = now_timestamp() 2893 2894 # Clean up expired environments by removing their views and schemas 2895 self._cleanup_environments(current_ts=current_ts) 2896 2897 delete_expired_snapshots( 2898 self.state_sync, 2899 self.snapshot_evaluator, 2900 current_ts=current_ts, 2901 ignore_ttl=ignore_ttl, 2902 console=self.console, 2903 batch_size=self.config.janitor.expired_snapshots_batch_size, 2904 ) 2905 self.state_sync.compact_intervals() 2906 2907 def _cleanup_environments(self, current_ts: t.Optional[int] = None) -> None: 2908 current_ts = current_ts or now_timestamp() 2909 2910 expired_environments_summaries = self.state_sync.get_expired_environments( 2911 current_ts=current_ts 2912 ) 2913 2914 for expired_env_summary in expired_environments_summaries: 2915 expired_env = self.state_reader.get_environment(expired_env_summary.name) 2916 2917 if expired_env: 2918 cleanup_expired_views( 2919 default_adapter=self.engine_adapter, 2920 engine_adapters=self.engine_adapters, 2921 environments=[expired_env], 2922 warn_on_delete_failure=self.config.janitor.warn_on_delete_failure, 2923 console=self.console, 2924 ) 2925 2926 self.state_sync.delete_expired_environments(current_ts=current_ts) 2927 2928 def _try_connection(self, connection_name: str, validator: t.Callable[[], None]) -> None: 2929 connection_name = connection_name.capitalize() 2930 try: 2931 validator() 2932 self.console.log_status_update(f"{connection_name} connection [green]succeeded[/green]") 2933 except Exception as ex: 2934 self.console.log_error(f"{connection_name} connection failed. {ex}") 2935 2936 def _new_state_sync(self) -> StateSync: 2937 return self._provided_state_sync or self._scheduler.create_state_sync(self) 2938 2939 def _new_selector( 2940 self, models: t.Optional[UniqueKeyDict[str, Model]] = None, dag: t.Optional[DAG[str]] = None 2941 ) -> Selector: 2942 return self._selector_cls( 2943 self.state_reader, 2944 models=models or self._models, 2945 context_path=self.path, 2946 dag=dag, 2947 default_catalog=self.default_catalog, 2948 dialect=self.default_dialect, 2949 cache_dir=self.cache_dir, 2950 ) 2951 2952 def _register_notification_targets(self) -> None: 2953 event_notifications = collections.defaultdict(set) 2954 for target in self.notification_targets: 2955 if target.is_configured: 2956 for event in target.notify_on: 2957 event_notifications[event].add(target) 2958 user_notification_targets = { 2959 user.username: set( 2960 target for target in user.notification_targets if target.is_configured 2961 ) 2962 for user in self.users 2963 } 2964 self.notification_target_manager = NotificationTargetManager( 2965 event_notifications, user_notification_targets, username=self.config.username 2966 ) 2967 2968 def _load_materializations(self) -> None: 2969 if not self._loaded: 2970 for loader in self._loaders: 2971 loader.load_materializations() 2972 2973 def _select_models_for_run( 2974 self, 2975 select_models: t.Collection[str], 2976 no_auto_upstream: bool, 2977 snapshots: t.Collection[Snapshot], 2978 ) -> t.Set[str]: 2979 models: UniqueKeyDict[str, Model] = UniqueKeyDict( 2980 "models", **{s.name: s.model for s in snapshots if s.is_model} 2981 ) 2982 dag: DAG[str] = DAG() 2983 for fqn, model in models.items(): 2984 dag.add(fqn, model.depends_on) 2985 model_selector = self._new_selector(models=models, dag=dag) 2986 result = set(model_selector.expand_model_selections(select_models)) 2987 if not no_auto_upstream: 2988 result = set(dag.subdag(*result)) 2989 return result 2990 2991 @cached_property 2992 def _project_type(self) -> str: 2993 project_types = { 2994 c.DBT if loader.__class__.__name__.lower().startswith(c.DBT) else c.NATIVE 2995 for loader in self._loaders 2996 } 2997 return c.HYBRID if len(project_types) > 1 else first(project_types) 2998 2999 def _nodes_to_snapshots(self, nodes: t.Dict[str, Node]) -> t.Dict[str, Snapshot]: 3000 snapshots: t.Dict[str, Snapshot] = {} 3001 fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} 3002 3003 for node in nodes.values(): 3004 kwargs: t.Dict[str, t.Any] = {} 3005 if node.project in self._projects: 3006 config = self.config_for_node(node) 3007 kwargs["ttl"] = config.snapshot_ttl 3008 kwargs["table_naming_convention"] = config.physical_table_naming_convention 3009 3010 snapshot = Snapshot.from_node( 3011 node, 3012 nodes=nodes, 3013 cache=fingerprint_cache, 3014 **kwargs, 3015 ) 3016 snapshots[snapshot.name] = snapshot 3017 return snapshots 3018 3019 def _node_or_snapshot_to_fqn(self, node_or_snapshot: NodeOrSnapshot) -> str: 3020 if isinstance(node_or_snapshot, Snapshot): 3021 return node_or_snapshot.name 3022 if isinstance(node_or_snapshot, str) and not self.standalone_audits.get(node_or_snapshot): 3023 return normalize_model_name( 3024 node_or_snapshot, 3025 dialect=self.default_dialect, 3026 default_catalog=self.default_catalog, 3027 ) 3028 if not isinstance(node_or_snapshot, str): 3029 return node_or_snapshot.fqn 3030 return node_or_snapshot 3031 3032 @property 3033 def _plan_preview_enabled(self) -> bool: 3034 if self.config.plan.enable_preview is not None: 3035 return self.config.plan.enable_preview 3036 # It is dangerous to enable preview by default for dbt projects that rely on engines that don't support cloning. 3037 # Enabling previews in such cases can result in unintended full refreshes because dbt incremental models rely on 3038 # the maximum timestamp value in the target table. 3039 return self._project_type == c.NATIVE or self.engine_adapter.SUPPORTS_CLONING 3040 3041 def _get_plan_default_start_end( 3042 self, 3043 snapshots: t.Dict[str, Snapshot], 3044 max_interval_end_per_model: t.Dict[str, datetime], 3045 backfill_models: t.Optional[t.Set[str]], 3046 modified_model_names: t.Set[str], 3047 execution_time: t.Optional[TimeLike] = None, 3048 ) -> t.Tuple[t.Optional[int], t.Optional[int]]: 3049 # exclude seeds so their stale interval ends does not become the default plan end date 3050 # when they're the only ones that contain intervals in this plan 3051 non_seed_interval_ends = { 3052 model_fqn: end 3053 for model_fqn, end in max_interval_end_per_model.items() 3054 if model_fqn not in snapshots or not snapshots[model_fqn].is_seed 3055 } 3056 if not non_seed_interval_ends: 3057 return None, None 3058 3059 default_end = to_timestamp(max(non_seed_interval_ends.values())) 3060 default_start: t.Optional[int] = None 3061 # Infer the default start by finding the smallest interval start that corresponds to the default end. 3062 for model_name in backfill_models or modified_model_names or max_interval_end_per_model: 3063 if model_name not in snapshots: 3064 continue 3065 node = snapshots[model_name].node 3066 interval_unit = node.interval_unit 3067 default_start = min( 3068 default_start or sys.maxsize, 3069 to_timestamp( 3070 interval_unit.cron_prev( 3071 interval_unit.cron_floor( 3072 max_interval_end_per_model.get( 3073 model_name, node.cron_floor(default_end) 3074 ), 3075 ), 3076 estimate=True, 3077 ) 3078 ), 3079 ) 3080 3081 if execution_time and to_timestamp(default_end) > to_timestamp(execution_time): 3082 # the end date can't be in the future, which can happen if a specific `execution_time` is set and prod intervals 3083 # are newer than it 3084 default_end = to_timestamp(execution_time) 3085 3086 return default_start, default_end 3087 3088 def _calculate_start_override_per_model( 3089 self, 3090 min_intervals: t.Optional[int], 3091 plan_start: t.Optional[TimeLike], 3092 plan_end: t.Optional[TimeLike], 3093 plan_execution_time: TimeLike, 3094 backfill_model_fqns: t.Optional[t.Set[str]], 3095 snapshots_by_model_fqn: t.Dict[str, Snapshot], 3096 end_override_per_model: t.Optional[t.Dict[str, datetime]], 3097 ) -> t.Dict[str, datetime]: 3098 if not min_intervals or not backfill_model_fqns or not plan_start: 3099 # If there are no models to backfill, there are no intervals to consider for backfill, so we dont need to consider a minimum number 3100 # If the plan doesnt have a start date, all intervals are considered already so we dont need to consider a minimum number 3101 # If we dont have a minimum number of intervals to consider, then we dont need to adjust the start date on a per-model basis 3102 return {} 3103 3104 start_overrides: t.Dict[str, datetime] = {} 3105 end_override_per_model = end_override_per_model or {} 3106 3107 plan_execution_time_dt = to_datetime(plan_execution_time) 3108 plan_start_dt = to_datetime(plan_start, relative_base=plan_execution_time_dt) 3109 plan_end_dt = to_datetime( 3110 plan_end or plan_execution_time_dt, relative_base=plan_execution_time_dt 3111 ) 3112 3113 # we need to take the DAG into account so that parent models can be expanded to cover at least as much as their children 3114 # for example, A(hourly) <- B(daily) 3115 # if min_intervals=1, A would have 1 hour and B would have 1 day 3116 # but B depends on A so in order for B to have 1 valid day, A needs to be expanded to 24 hours 3117 backfill_dag: DAG[str] = DAG() 3118 for fqn in backfill_model_fqns: 3119 backfill_dag.add( 3120 fqn, 3121 [ 3122 p.name 3123 for p in snapshots_by_model_fqn[fqn].parents 3124 if p.name in backfill_model_fqns 3125 ], 3126 ) 3127 3128 # start from the leaf nodes and work back towards the root because the min_start at the root node is determined by the calculated starts in the leaf nodes 3129 reversed_dag = backfill_dag.reversed 3130 graph = reversed_dag.graph 3131 3132 for model_fqn in reversed_dag: 3133 # Get the earliest start from all immediate children of this snapshot 3134 # this works because topological ordering guarantees that they've already been visited 3135 # and we always set a start override 3136 min_child_start = min( 3137 [start_overrides[immediate_child_fqn] for immediate_child_fqn in graph[model_fqn]], 3138 default=plan_start_dt, 3139 ) 3140 3141 snapshot = snapshots_by_model_fqn.get(model_fqn) 3142 3143 if not snapshot: 3144 continue 3145 3146 starting_point = end_override_per_model.get(model_fqn, plan_end_dt) 3147 if node_end := snapshot.node.end: 3148 # if we dont do this, if the node end is a *date* (as opposed to a timestamp) 3149 # we end up incorrectly winding back an extra day 3150 node_end_dt = make_exclusive(node_end) 3151 3152 if node_end_dt < plan_end_dt: 3153 # if the model has an end date that has already elapsed, use that as a starting point for calculating min_intervals 3154 # instead of the plan end. If we use the plan end, we will return intervals in the future which are invalid 3155 starting_point = node_end_dt 3156 3157 snapshot_start = snapshot.node.cron_floor(starting_point) 3158 3159 for _ in range(min_intervals): 3160 # wind back the starting point by :min_intervals intervals to arrive at the minimum snapshot start date 3161 snapshot_start = snapshot.node.cron_prev(snapshot_start) 3162 3163 start_overrides[model_fqn] = min(min_child_start, snapshot_start) 3164 3165 return start_overrides 3166 3167 def _get_max_interval_end_per_model( 3168 self, snapshots: t.Dict[str, Snapshot], backfill_models: t.Optional[t.Set[str]] 3169 ) -> t.Dict[str, datetime]: 3170 models_for_interval_end = ( 3171 self._get_models_for_interval_end(snapshots, backfill_models) 3172 if backfill_models is not None 3173 else None 3174 ) 3175 return { 3176 model_fqn: to_datetime(ts) 3177 for model_fqn, ts in self.state_sync.max_interval_end_per_model( 3178 c.PROD, 3179 models=models_for_interval_end, 3180 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 3181 ).items() 3182 } 3183 3184 @staticmethod 3185 def _get_models_for_interval_end( 3186 snapshots: t.Dict[str, Snapshot], backfill_models: t.Set[str] 3187 ) -> t.Set[str]: 3188 models_for_interval_end = set() 3189 models_stack = list(backfill_models) 3190 while models_stack: 3191 next_model = models_stack.pop() 3192 if next_model not in snapshots: 3193 continue 3194 models_for_interval_end.add(next_model) 3195 models_stack.extend( 3196 s.name 3197 for s in snapshots[next_model].parents 3198 if s.name not in models_for_interval_end 3199 ) 3200 return models_for_interval_end 3201 3202 def lint_models( 3203 self, 3204 models: t.Optional[t.Iterable[t.Union[str, Model]]] = None, 3205 raise_on_error: bool = True, 3206 ) -> t.List[AnnotatedRuleViolation]: 3207 found_error = False 3208 3209 model_list = ( 3210 list(self.get_model(model, raise_if_missing=True) for model in models) 3211 if models 3212 else self.models.values() 3213 ) 3214 all_violations = [] 3215 for model in model_list: 3216 # Linter may be `None` if the context is not loaded yet 3217 if linter := self._linters.get(model.project): 3218 lint_violation, violations = ( 3219 linter.lint_model(model, self, console=self.console) or found_error 3220 ) 3221 if lint_violation: 3222 found_error = True 3223 all_violations.extend(violations) 3224 3225 if raise_on_error and found_error: 3226 raise LinterError( 3227 "Linter detected errors in the code. Please fix them before proceeding." 3228 ) 3229 3230 return all_violations 3231 3232 def select_tests( 3233 self, 3234 tests: t.Optional[t.List[str]] = None, 3235 patterns: t.Optional[t.List[str]] = None, 3236 ) -> t.List[ModelTestMetadata]: 3237 """Filter pre-loaded test metadata based on tests and patterns.""" 3238 3239 test_meta = self._model_test_metadata 3240 3241 if tests: 3242 filtered_tests = [] 3243 for test in tests: 3244 if "::" in test: 3245 if test in self._model_test_metadata_fully_qualified_name_index: 3246 filtered_tests.append( 3247 self._model_test_metadata_fully_qualified_name_index[test] 3248 ) 3249 else: 3250 test_path = Path(test) 3251 if test_path in self._model_test_metadata_path_index: 3252 filtered_tests.extend(self._model_test_metadata_path_index[test_path]) 3253 3254 test_meta = filtered_tests 3255 3256 if patterns: 3257 test_meta = filter_tests_by_patterns(test_meta, patterns) 3258 3259 return test_meta 3260 3261 3262class Context(GenericContext[Config]): 3263 CONFIG_TYPE = Config
167class BaseContext(abc.ABC): 168 """The base context which defines methods to execute a model.""" 169 170 @property 171 @abc.abstractmethod 172 def default_dialect(self) -> t.Optional[str]: 173 """Returns the default dialect.""" 174 175 @property 176 @abc.abstractmethod 177 def _model_tables(self) -> t.Dict[str, str]: 178 """Returns a mapping of model names to tables.""" 179 180 @property 181 @abc.abstractmethod 182 def engine_adapter(self) -> EngineAdapter: 183 """Returns an engine adapter.""" 184 185 @property 186 def spark(self) -> t.Optional[PySparkSession]: 187 """Returns the spark session if it exists.""" 188 return self.engine_adapter.spark 189 190 @property 191 def snowpark(self) -> t.Optional[SnowparkSession]: 192 """Returns the snowpark session if it exists.""" 193 return self.engine_adapter.snowpark 194 195 @property 196 def bigframe(self) -> t.Optional[BigframeSession]: 197 """Returns the bigframe session if it exists.""" 198 return self.engine_adapter.bigframe 199 200 @property 201 def default_catalog(self) -> t.Optional[str]: 202 raise NotImplementedError 203 204 def table(self, model_name: str) -> str: 205 get_console().log_warning( 206 "The SQLMesh context's `table` method is deprecated and will be removed " 207 "in a future release. Please use the `resolve_table` method instead." 208 ) 209 return self.resolve_table(model_name) 210 211 def resolve_table(self, model_name: str) -> str: 212 """Gets the physical table name for a given model. 213 214 Args: 215 model_name: The model name. 216 217 Returns: 218 The physical table name. 219 """ 220 model_name = normalize_model_name(model_name, self.default_catalog, self.default_dialect) 221 222 if model_name not in self._model_tables: 223 model_name_list = "\n".join(list(self._model_tables)) 224 logger.debug( 225 f"'{model_name}' not found in model to table mapping. Available model names: \n{model_name_list}" 226 ) 227 raise SQLMeshError( 228 f"Unable to find a table mapping for model '{model_name}'. Has it been spelled correctly?" 229 ) 230 231 # We generate SQL for the default dialect because the table name may be used in a 232 # fetchdf call and so the quotes need to be correct (eg. backticks for bigquery) 233 return parse_one(self._model_tables[model_name]).sql( 234 dialect=self.default_dialect, identify=True 235 ) 236 237 def fetchdf( 238 self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False 239 ) -> pd.DataFrame: 240 """Fetches a dataframe given a sql string or sqlglot expression. 241 242 Args: 243 query: SQL string or sqlglot expression. 244 quote_identifiers: Whether to quote all identifiers in the query. 245 246 Returns: 247 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 248 """ 249 return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers) 250 251 def fetch_pyspark_df( 252 self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False 253 ) -> PySparkDataFrame: 254 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 255 256 Args: 257 query: SQL string or sqlglot expression. 258 quote_identifiers: Whether to quote all identifiers in the query. 259 260 Returns: 261 A PySpark dataframe. 262 """ 263 return self.engine_adapter.fetch_pyspark_df(query, quote_identifiers=quote_identifiers)
The base context which defines methods to execute a model.
170 @property 171 @abc.abstractmethod 172 def default_dialect(self) -> t.Optional[str]: 173 """Returns the default dialect."""
Returns the default dialect.
180 @property 181 @abc.abstractmethod 182 def engine_adapter(self) -> EngineAdapter: 183 """Returns an engine adapter."""
Returns an engine adapter.
185 @property 186 def spark(self) -> t.Optional[PySparkSession]: 187 """Returns the spark session if it exists.""" 188 return self.engine_adapter.spark
Returns the spark session if it exists.
190 @property 191 def snowpark(self) -> t.Optional[SnowparkSession]: 192 """Returns the snowpark session if it exists.""" 193 return self.engine_adapter.snowpark
Returns the snowpark session if it exists.
195 @property 196 def bigframe(self) -> t.Optional[BigframeSession]: 197 """Returns the bigframe session if it exists.""" 198 return self.engine_adapter.bigframe
Returns the bigframe session if it exists.
211 def resolve_table(self, model_name: str) -> str: 212 """Gets the physical table name for a given model. 213 214 Args: 215 model_name: The model name. 216 217 Returns: 218 The physical table name. 219 """ 220 model_name = normalize_model_name(model_name, self.default_catalog, self.default_dialect) 221 222 if model_name not in self._model_tables: 223 model_name_list = "\n".join(list(self._model_tables)) 224 logger.debug( 225 f"'{model_name}' not found in model to table mapping. Available model names: \n{model_name_list}" 226 ) 227 raise SQLMeshError( 228 f"Unable to find a table mapping for model '{model_name}'. Has it been spelled correctly?" 229 ) 230 231 # We generate SQL for the default dialect because the table name may be used in a 232 # fetchdf call and so the quotes need to be correct (eg. backticks for bigquery) 233 return parse_one(self._model_tables[model_name]).sql( 234 dialect=self.default_dialect, identify=True 235 )
Gets the physical table name for a given model.
Arguments:
- model_name: The model name.
Returns:
The physical table name.
237 def fetchdf( 238 self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False 239 ) -> pd.DataFrame: 240 """Fetches a dataframe given a sql string or sqlglot expression. 241 242 Args: 243 query: SQL string or sqlglot expression. 244 quote_identifiers: Whether to quote all identifiers in the query. 245 246 Returns: 247 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 248 """ 249 return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers)
Fetches a dataframe given a sql string or sqlglot expression.
Arguments:
- query: SQL string or sqlglot expression.
- quote_identifiers: Whether to quote all identifiers in the query.
Returns:
The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.
251 def fetch_pyspark_df( 252 self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False 253 ) -> PySparkDataFrame: 254 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 255 256 Args: 257 query: SQL string or sqlglot expression. 258 quote_identifiers: Whether to quote all identifiers in the query. 259 260 Returns: 261 A PySpark dataframe. 262 """ 263 return self.engine_adapter.fetch_pyspark_df(query, quote_identifiers=quote_identifiers)
Fetches a PySpark dataframe given a sql string or sqlglot expression.
Arguments:
- query: SQL string or sqlglot expression.
- quote_identifiers: Whether to quote all identifiers in the query.
Returns:
A PySpark dataframe.
266class ExecutionContext(BaseContext): 267 """The minimal context needed to execute a model. 268 269 Args: 270 engine_adapter: The engine adapter to execute queries against. 271 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 272 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 273 """ 274 275 def __init__( 276 self, 277 engine_adapter: EngineAdapter, 278 snapshots: t.Dict[str, Snapshot], 279 deployability_index: t.Optional[DeployabilityIndex] = None, 280 default_dialect: t.Optional[str] = None, 281 default_catalog: t.Optional[str] = None, 282 is_restatement: t.Optional[bool] = None, 283 parent_intervals: t.Optional[Intervals] = None, 284 variables: t.Optional[t.Dict[str, t.Any]] = None, 285 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 286 ): 287 self.snapshots = snapshots 288 self.deployability_index = deployability_index 289 self._engine_adapter = engine_adapter 290 self._default_catalog = default_catalog 291 self._default_dialect = default_dialect 292 self._variables = variables or {} 293 self._blueprint_variables = blueprint_variables or {} 294 self._is_restatement = is_restatement 295 self._parent_intervals = parent_intervals 296 297 @property 298 def default_dialect(self) -> t.Optional[str]: 299 return self._default_dialect 300 301 @property 302 def engine_adapter(self) -> EngineAdapter: 303 """Returns an engine adapter.""" 304 return self._engine_adapter 305 306 @cached_property 307 def _model_tables(self) -> t.Dict[str, str]: 308 """Returns a mapping of model names to tables.""" 309 return to_table_mapping(self.snapshots.values(), self.deployability_index) 310 311 @property 312 def default_catalog(self) -> t.Optional[str]: 313 return self._default_catalog 314 315 @property 316 def gateway(self) -> t.Optional[str]: 317 """Returns the gateway name.""" 318 return self.var(c.GATEWAY) 319 320 @property 321 def is_restatement(self) -> t.Optional[bool]: 322 return self._is_restatement 323 324 @property 325 def parent_intervals(self) -> t.Optional[Intervals]: 326 return self._parent_intervals 327 328 def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 329 """Returns a variable value.""" 330 return self._variables.get(var_name.lower(), default) 331 332 def blueprint_var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 333 """Returns a blueprint variable value.""" 334 return self._blueprint_variables.get(var_name.lower(), default) 335 336 def with_variables( 337 self, 338 variables: t.Dict[str, t.Any], 339 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 340 ) -> ExecutionContext: 341 """Returns a new ExecutionContext with additional variables.""" 342 return ExecutionContext( 343 self._engine_adapter, 344 self.snapshots, 345 self.deployability_index, 346 self._default_dialect, 347 self._default_catalog, 348 self._is_restatement, 349 variables=variables, 350 blueprint_variables=blueprint_variables, 351 )
The minimal context needed to execute a model.
Arguments:
- engine_adapter: The engine adapter to execute queries against.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- deployability_index: Determines snapshots that are deployable in the context of this evaluation.
275 def __init__( 276 self, 277 engine_adapter: EngineAdapter, 278 snapshots: t.Dict[str, Snapshot], 279 deployability_index: t.Optional[DeployabilityIndex] = None, 280 default_dialect: t.Optional[str] = None, 281 default_catalog: t.Optional[str] = None, 282 is_restatement: t.Optional[bool] = None, 283 parent_intervals: t.Optional[Intervals] = None, 284 variables: t.Optional[t.Dict[str, t.Any]] = None, 285 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 286 ): 287 self.snapshots = snapshots 288 self.deployability_index = deployability_index 289 self._engine_adapter = engine_adapter 290 self._default_catalog = default_catalog 291 self._default_dialect = default_dialect 292 self._variables = variables or {} 293 self._blueprint_variables = blueprint_variables or {} 294 self._is_restatement = is_restatement 295 self._parent_intervals = parent_intervals
301 @property 302 def engine_adapter(self) -> EngineAdapter: 303 """Returns an engine adapter.""" 304 return self._engine_adapter
Returns an engine adapter.
315 @property 316 def gateway(self) -> t.Optional[str]: 317 """Returns the gateway name.""" 318 return self.var(c.GATEWAY)
Returns the gateway name.
328 def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 329 """Returns a variable value.""" 330 return self._variables.get(var_name.lower(), default)
Returns a variable value.
332 def blueprint_var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 333 """Returns a blueprint variable value.""" 334 return self._blueprint_variables.get(var_name.lower(), default)
Returns a blueprint variable value.
336 def with_variables( 337 self, 338 variables: t.Dict[str, t.Any], 339 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 340 ) -> ExecutionContext: 341 """Returns a new ExecutionContext with additional variables.""" 342 return ExecutionContext( 343 self._engine_adapter, 344 self.snapshots, 345 self.deployability_index, 346 self._default_dialect, 347 self._default_catalog, 348 self._is_restatement, 349 variables=variables, 350 blueprint_variables=blueprint_variables, 351 )
Returns a new ExecutionContext with additional variables.
Inherited Members
354class GenericContext(BaseContext, t.Generic[C]): 355 """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. 356 357 Args: 358 notification_targets: The notification target to use. Defaults to what is defined in config. 359 paths: The directories containing SQLMesh files. 360 config: A Config object or the name of a Config object in config.py. 361 connection: The name of the connection. If not specified the first connection as it appears 362 in configuration will be used. 363 test_connection: The name of the connection to use for tests. If not specified the first 364 connection as it appears in configuration will be used. 365 concurrent_tasks: The maximum number of tasks that can use the connection concurrently. 366 load: Whether or not to automatically load all models and macros (default True). 367 console: The rich instance used for printing out CLI command results. 368 users: A list of users to make known to SQLMesh. 369 """ 370 371 CONFIG_TYPE: t.Type[C] 372 """The type of config object to use (default: Config).""" 373 374 PLAN_BUILDER_TYPE = PlanBuilder 375 """The type of plan builder object to use (default: PlanBuilder).""" 376 377 def __init__( 378 self, 379 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 380 state_sync: t.Optional[StateSync] = None, 381 paths: t.Union[str | Path, t.Iterable[str | Path]] = "", 382 config: t.Optional[t.Union[C, str, t.Dict[Path, C]]] = None, 383 gateway: t.Optional[str] = None, 384 concurrent_tasks: t.Optional[int] = None, 385 loader: t.Optional[t.Type[Loader]] = None, 386 load: bool = True, 387 users: t.Optional[t.List[User]] = None, 388 config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, 389 selector: t.Optional[t.Type[Selector]] = None, 390 ): 391 self.configs = ( 392 config 393 if isinstance(config, dict) 394 else load_configs(config, self.CONFIG_TYPE, paths, **(config_loader_kwargs or {})) 395 ) 396 self._projects = {config.project for config in self.configs.values()} 397 self.dag: DAG[str] = DAG() 398 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 399 self._audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("audits") 400 self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( 401 "standaloneaudits" 402 ) 403 self._model_test_metadata: t.List[ModelTestMetadata] = [] 404 self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} 405 self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} 406 self._models_with_tests: t.Set[str] = set() 407 408 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 409 self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") 410 self._jinja_macros = JinjaMacroRegistry() 411 self._requirements: t.Dict[str, str] = {} 412 self._environment_statements: t.List[EnvironmentStatements] = [] 413 self._excluded_requirements: t.Set[str] = set() 414 self._engine_adapter: t.Optional[EngineAdapter] = None 415 self._linters: t.Dict[str, Linter] = {} 416 self._loaded: bool = False 417 self._selector_cls = selector or NativeSelector 418 419 self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) 420 421 self._all_dialects: t.Set[str] = {self.config.dialect or ""} 422 423 if self.config.disable_anonymized_analytics: 424 analytics.disable_analytics() 425 426 self.gateway = gateway 427 self._scheduler = self.config.get_scheduler(self.gateway) 428 self.environment_ttl = self.config.environment_ttl 429 self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) 430 self.auto_categorize_changes = self.config.plan.auto_categorize_changes 431 self.selected_gateway = (gateway or self.config.default_gateway_name).lower() 432 433 gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults 434 if gw_model_defaults: 435 # Merge global model defaults with the selected gateway's, if it's overriden 436 global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) 437 gateway_defaults = gw_model_defaults.model_dump(exclude_unset=True) 438 439 self.config.model_defaults = ModelDefaultsConfig( 440 **{**global_defaults, **gateway_defaults} 441 ) 442 443 # This allows overriding the default dialect's normalization strategy, so for example 444 # one can do `dialect="duckdb,normalization_strategy=lowercase"` and this will be 445 # applied to the DuckDB dialect globally 446 if "normalization_strategy" in str(self.config.dialect): 447 dialect = Dialect.get_or_raise(self.config.dialect) 448 type(dialect).NORMALIZATION_STRATEGY = dialect.normalization_strategy 449 450 self._loaders = [ 451 (loader or config.loader)(self, path, **config.loader_kwargs) 452 for path, config in self.configs.items() 453 ] 454 455 self._concurrent_tasks = concurrent_tasks 456 self._state_connection_config = ( 457 self.config.get_state_connection(self.gateway) or self.connection_config 458 ) 459 460 self._snapshot_evaluator: t.Optional[SnapshotEvaluator] = None 461 462 self.console = get_console() 463 setattr(self.console, "dialect", self.config.dialect) 464 465 self._provided_state_sync: t.Optional[StateSync] = state_sync 466 self._state_sync: t.Optional[StateSync] = None 467 468 # Should we dedupe notification_targets? If so how? 469 self.notification_targets = (notification_targets or []) + self.config.notification_targets 470 self.users = (users or []) + self.config.users 471 self.users = list({user.username: user for user in self.users}.values()) 472 self._register_notification_targets() 473 474 if load: 475 self.load() 476 477 @property 478 def default_dialect(self) -> t.Optional[str]: 479 return self.config.dialect 480 481 @property 482 def engine_adapter(self) -> EngineAdapter: 483 """Returns the default engine adapter.""" 484 if self._engine_adapter is None: 485 self._engine_adapter = self.connection_config.create_engine_adapter() 486 return self._engine_adapter 487 488 @property 489 def snapshot_evaluator(self) -> SnapshotEvaluator: 490 if not self._snapshot_evaluator: 491 self._snapshot_evaluator = SnapshotEvaluator( 492 { 493 gateway: adapter.with_settings(execute_log_level=logging.INFO) 494 for gateway, adapter in self.engine_adapters.items() 495 }, 496 ddl_concurrent_tasks=self.concurrent_tasks, 497 selected_gateway=self.selected_gateway, 498 ) 499 return self._snapshot_evaluator 500 501 def execution_context( 502 self, 503 deployability_index: t.Optional[DeployabilityIndex] = None, 504 engine_adapter: t.Optional[EngineAdapter] = None, 505 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 506 ) -> ExecutionContext: 507 """Returns an execution context.""" 508 return ExecutionContext( 509 engine_adapter=engine_adapter or self.engine_adapter, 510 snapshots=snapshots or self.snapshots, 511 deployability_index=deployability_index, 512 default_dialect=self.default_dialect, 513 default_catalog=self.default_catalog, 514 ) 515 516 @python_api_analytics 517 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 518 """Update or insert a model. 519 520 The context's models dictionary will be updated to include these changes. 521 522 Args: 523 model: Model name or instance to update. 524 kwargs: The kwargs to update the model with. 525 526 Returns: 527 A new instance of the updated or inserted model. 528 """ 529 model = self.get_model(model, raise_if_missing=True) 530 if not model.enabled: 531 raise SQLMeshError(f"The disabled model '{model.name}' cannot be upserted") 532 path = model._path 533 534 model = model.copy(update=kwargs) 535 model._path = path 536 537 self.dag.add(model.fqn, model.depends_on) 538 539 self._models.update( 540 { 541 model.fqn: model, 542 # bust the fingerprint cache for all downstream models 543 **{fqn: self._models[fqn].copy() for fqn in self.dag.downstream(model.fqn)}, 544 } 545 ) 546 547 update_model_schemas( 548 self.dag, 549 models=self._models, 550 cache_dir=self.cache_dir, 551 ) 552 553 if model.dialect: 554 self._all_dialects.add(model.dialect) 555 556 model.validate_definition() 557 558 return model 559 560 def scheduler( 561 self, 562 environment: t.Optional[str] = None, 563 snapshot_evaluator: t.Optional[SnapshotEvaluator] = None, 564 ) -> Scheduler: 565 """Returns the built-in scheduler. 566 567 Args: 568 environment: The target environment to source model snapshots from, or None 569 if snapshots should be sourced from the currently loaded local state. 570 571 Returns: 572 The built-in scheduler instance. 573 """ 574 snapshots: t.Iterable[Snapshot] 575 if environment is not None: 576 stored_environment = self.state_sync.get_environment(environment) 577 if stored_environment is None: 578 raise ConfigError(f"Environment '{environment}' was not found.") 579 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 580 else: 581 snapshots = self.snapshots.values() 582 583 if not snapshots: 584 raise ConfigError("No models were found") 585 586 return self.create_scheduler(snapshots, snapshot_evaluator or self.snapshot_evaluator) 587 588 def create_scheduler( 589 self, snapshots: t.Iterable[Snapshot], snapshot_evaluator: SnapshotEvaluator 590 ) -> Scheduler: 591 """Creates the built-in scheduler. 592 593 Args: 594 snapshots: The snapshots to schedule. 595 596 Returns: 597 The built-in scheduler instance. 598 """ 599 return Scheduler( 600 snapshots, 601 snapshot_evaluator, 602 self.state_sync, 603 default_catalog=self.default_catalog, 604 max_workers=self.concurrent_tasks, 605 console=self.console, 606 notification_target_manager=self.notification_target_manager, 607 ) 608 609 @property 610 def state_sync(self) -> StateSync: 611 if not self._state_sync: 612 self._state_sync = self._new_state_sync() 613 614 if self._state_sync.get_versions(validate=False).schema_version == 0: 615 self.console.log_status_update("Initializing new project state...") 616 self._state_sync.migrate() 617 self._state_sync.get_versions() 618 self._state_sync = CachingStateSync(self._state_sync) # type: ignore 619 return self._state_sync 620 621 @property 622 def state_reader(self) -> StateReader: 623 return self.state_sync 624 625 def refresh(self) -> None: 626 """Refresh all models that have been updated.""" 627 if any(loader.reload_needed() for loader in self._loaders): 628 self.load() 629 630 def load(self, update_schemas: bool = True) -> GenericContext[C]: 631 """Load all files in the context's path.""" 632 load_start_ts = time.perf_counter() 633 634 loaded_projects = [loader.load() for loader in self._loaders] 635 636 self.dag = DAG() 637 self._standalone_audits.clear() 638 self._audits.clear() 639 self._macros.clear() 640 self._models.clear() 641 self._metrics.clear() 642 self._requirements.clear() 643 self._excluded_requirements.clear() 644 self._linters.clear() 645 self._environment_statements = [] 646 self._model_test_metadata.clear() 647 self._model_test_metadata_path_index.clear() 648 self._model_test_metadata_fully_qualified_name_index.clear() 649 self._models_with_tests.clear() 650 651 for loader, project in zip(self._loaders, loaded_projects): 652 self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) 653 self._macros.update(project.macros) 654 self._models.update(project.models) 655 self._metrics.update(project.metrics) 656 self._audits.update(project.audits) 657 self._standalone_audits.update(project.standalone_audits) 658 self._requirements.update(project.requirements) 659 self._excluded_requirements.update(project.excluded_requirements) 660 self._environment_statements.extend(project.environment_statements) 661 662 self._model_test_metadata.extend(project.model_test_metadata) 663 for metadata in project.model_test_metadata: 664 if metadata.path not in self._model_test_metadata_path_index: 665 self._model_test_metadata_path_index[metadata.path] = [] 666 self._model_test_metadata_path_index[metadata.path].append(metadata) 667 self._model_test_metadata_fully_qualified_name_index[ 668 metadata.fully_qualified_test_name 669 ] = metadata 670 self._models_with_tests.add(metadata.model_name) 671 672 config = loader.config 673 self._linters[config.project] = Linter.from_rules( 674 BUILTIN_RULES.union(project.user_rules), config.linter 675 ) 676 677 # Load environment statements from state for projects not in current load 678 if any(self._projects): 679 prod = self.state_reader.get_environment(c.PROD) 680 if prod: 681 existing_statements = self.state_reader.get_environment_statements(c.PROD) 682 for stmt in existing_statements: 683 if stmt.project and stmt.project not in self._projects: 684 self._environment_statements.append(stmt) 685 686 uncached = set() 687 688 if any(self._projects): 689 prod = self.state_reader.get_environment(c.PROD) 690 691 if prod: 692 for snapshot in self.state_reader.get_snapshots(prod.snapshots).values(): 693 if snapshot.node.project in self._projects: 694 uncached.add(snapshot.name) 695 else: 696 local_store = self._standalone_audits if snapshot.is_audit else self._models 697 if snapshot.name in local_store: 698 uncached.add(snapshot.name) 699 else: 700 local_store[snapshot.name] = snapshot.node # type: ignore 701 702 for model in self._models.values(): 703 self.dag.add(model.fqn, model.depends_on) 704 705 if update_schemas: 706 for fqn in self.dag: 707 model = self._models.get(fqn) # type: ignore 708 709 if not model or fqn in uncached: 710 continue 711 712 # make a copy of remote models that depend on local models or in the downstream chain 713 # without this, a SELECT * FROM local will not propogate properly because the downstream 714 # model will get mutated (schema changes) but the object is the same as the remote cache 715 if any(dep in uncached for dep in model.depends_on): 716 uncached.add(fqn) 717 self._models.update({fqn: model.copy(update={"mapping_schema": {}})}) 718 continue 719 720 update_model_schemas( 721 self.dag, 722 models=self._models, 723 cache_dir=self.cache_dir, 724 ) 725 726 models = self.models.values() 727 for model in models: 728 # The model definition can be validated correctly only after the schema is set. 729 model.validate_definition() 730 731 duplicates = set(self._models) & set(self._standalone_audits) 732 if duplicates: 733 raise ConfigError( 734 f"Models and Standalone audits cannot have the same name: {duplicates}" 735 ) 736 737 self._all_dialects = {m.dialect for m in self._models.values() if m.dialect} | { 738 self.default_dialect or "" 739 } 740 741 analytics.collector.on_project_loaded( 742 project_type=self._project_type, 743 models_count=len(self._models), 744 audits_count=len(self._audits), 745 standalone_audits_count=len(self._standalone_audits), 746 macros_count=len(self._macros), 747 jinja_macros_count=len(self._jinja_macros.root_macros), 748 load_time_sec=time.perf_counter() - load_start_ts, 749 state_sync_fingerprint=self._scheduler.state_sync_fingerprint(self), 750 project_name=self.config.project, 751 ) 752 753 self._loaded = True 754 return self 755 756 @python_api_analytics 757 def run( 758 self, 759 environment: t.Optional[str] = None, 760 *, 761 start: t.Optional[TimeLike] = None, 762 end: t.Optional[TimeLike] = None, 763 execution_time: t.Optional[TimeLike] = None, 764 skip_janitor: bool = False, 765 ignore_cron: bool = False, 766 select_models: t.Optional[t.Collection[str]] = None, 767 exit_on_env_update: t.Optional[int] = None, 768 no_auto_upstream: bool = False, 769 ) -> CompletionStatus: 770 """Run the entire dag through the scheduler. 771 772 Args: 773 environment: The target environment to source model snapshots from and virtually update. Default: prod. 774 start: The start of the interval to render. 775 end: The end of the interval to render. 776 execution_time: The date/time time reference to use for execution time. Defaults to now. 777 skip_janitor: Whether to skip the janitor task. 778 ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals. 779 select_models: A list of model selection expressions to filter models that should run. Note that 780 upstream dependencies of selected models will also be evaluated. 781 exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update 782 to the target environment. 783 no_auto_upstream: Whether to not force upstream models to run. Only applicable when using `select_models`. 784 785 Returns: 786 True if the run was successful, False otherwise. 787 """ 788 environment = environment or self.config.default_target_environment 789 environment = Environment.sanitize_name(environment) 790 if not skip_janitor and environment.lower() == c.PROD: 791 self._run_janitor() 792 793 self.notification_target_manager.notify( 794 NotificationEvent.RUN_START, environment=environment 795 ) 796 analytics_run_id = analytics.collector.on_run_start( 797 engine_type=self.snapshot_evaluator.adapter.dialect, 798 state_sync_type=self.state_sync.state_type(), 799 ) 800 self._load_materializations() 801 802 env_check_attempts_num = max( 803 1, 804 self.config.run.environment_check_max_wait 805 // self.config.run.environment_check_interval, 806 ) 807 808 def _block_until_finalized() -> str: 809 for _ in range(env_check_attempts_num): 810 assert environment is not None # mypy 811 environment_state = self.state_sync.get_environment(environment) 812 if not environment_state: 813 raise SQLMeshError(f"Environment '{environment}' was not found.") 814 if environment_state.finalized_ts: 815 return environment_state.plan_id 816 self.console.log_warning( 817 f"Environment '{environment}' is being updated by plan '{environment_state.plan_id}'. " 818 f"Retrying in {self.config.run.environment_check_interval} seconds..." 819 ) 820 time.sleep(self.config.run.environment_check_interval) 821 raise SQLMeshError( 822 f"Exceeded the maximum wait time for environment '{environment}' to be ready. " 823 "This means that the environment either failed to update or the update is taking longer than expected. " 824 "See https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#run to adjust the timeout settings." 825 ) 826 827 success = False 828 interrupted = False 829 done = False 830 while not done: 831 plan_id_at_start = _block_until_finalized() 832 833 def _has_environment_changed() -> bool: 834 assert environment is not None # mypy 835 current_environment_state = self.state_sync.get_environment(environment) 836 return ( 837 not current_environment_state 838 or current_environment_state.plan_id != plan_id_at_start 839 or not current_environment_state.finalized_ts 840 ) 841 842 try: 843 completion_status = self._run( 844 environment, 845 start=start, 846 end=end, 847 execution_time=execution_time, 848 ignore_cron=ignore_cron, 849 select_models=select_models, 850 circuit_breaker=_has_environment_changed, 851 no_auto_upstream=no_auto_upstream, 852 ) 853 done = True 854 except CircuitBreakerError: 855 self.console.log_warning( 856 f"Environment '{environment}' modified while running. Restarting the run..." 857 ) 858 if exit_on_env_update: 859 interrupted = True 860 done = True 861 except Exception as e: 862 self.notification_target_manager.notify( 863 NotificationEvent.RUN_FAILURE, traceback.format_exc() 864 ) 865 logger.info("Run failed.", exc_info=e) 866 analytics.collector.on_run_end( 867 run_id=analytics_run_id, succeeded=False, interrupted=False, error=e 868 ) 869 raise e 870 871 if completion_status.is_success or interrupted: 872 self.notification_target_manager.notify( 873 NotificationEvent.RUN_END, environment=environment 874 ) 875 self.console.log_success(f"Run finished for environment '{environment}'") 876 elif completion_status.is_failure: 877 self.notification_target_manager.notify( 878 NotificationEvent.RUN_FAILURE, "See console logs for details." 879 ) 880 881 analytics.collector.on_run_end( 882 run_id=analytics_run_id, succeeded=success, interrupted=interrupted 883 ) 884 885 if interrupted and exit_on_env_update is not None: 886 sys.exit(exit_on_env_update) 887 888 return completion_status 889 890 @python_api_analytics 891 def run_janitor(self, ignore_ttl: bool) -> bool: 892 success = False 893 894 if self.console.start_cleanup(ignore_ttl): 895 try: 896 self._run_janitor(ignore_ttl) 897 success = True 898 finally: 899 self.console.stop_cleanup(success=success) 900 901 return success 902 903 @python_api_analytics 904 def destroy(self) -> bool: 905 success = False 906 907 # Collect resources to be deleted 908 environments = self.state_reader.get_environments() 909 schemas_to_delete = set() 910 tables_to_delete = set() 911 views_to_delete = set() 912 all_snapshot_infos = set() 913 914 # For each environment find schemas and tables 915 for environment in environments: 916 all_snapshot_infos.update(environment.snapshots) 917 snapshots = self.state_reader.get_snapshots(environment.snapshots).values() 918 for snapshot in snapshots: 919 if snapshot.is_model and not snapshot.is_symbolic: 920 # Get the appropriate adapter 921 if environment.gateway_managed and snapshot.model_gateway: 922 adapter = self.engine_adapters.get( 923 snapshot.model_gateway, self.engine_adapter 924 ) 925 else: 926 adapter = self.engine_adapter 927 928 if environment.suffix_target.is_schema or environment.suffix_target.is_catalog: 929 schema = snapshot.qualified_view_name.schema_for_environment( 930 environment.naming_info, dialect=adapter.dialect 931 ) 932 catalog = snapshot.qualified_view_name.catalog_for_environment( 933 environment.naming_info, dialect=adapter.dialect 934 ) 935 if catalog: 936 schemas_to_delete.add(f"{catalog}.{schema}") 937 else: 938 schemas_to_delete.add(schema) 939 940 if environment.suffix_target.is_table: 941 view_name = snapshot.qualified_view_name.for_environment( 942 environment.naming_info, dialect=adapter.dialect 943 ) 944 views_to_delete.add(view_name) 945 946 # Add snapshot tables 947 table_name = snapshot.table_name() 948 tables_to_delete.add(table_name) 949 950 if self.console.start_destroy(schemas_to_delete, views_to_delete, tables_to_delete): 951 try: 952 success = self._destroy() 953 finally: 954 self.console.stop_destroy(success=success) 955 956 return success 957 958 @t.overload 959 def get_model( 960 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: Literal[True] = True 961 ) -> Model: ... 962 963 @t.overload 964 def get_model( 965 self, 966 model_or_snapshot: ModelOrSnapshot, 967 raise_if_missing: Literal[False] = False, 968 ) -> t.Optional[Model]: ... 969 970 def get_model( 971 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: bool = False 972 ) -> t.Optional[Model]: 973 """Returns a model with the given name or None if a model with such name doesn't exist. 974 975 Args: 976 model_or_snapshot: A model name, model, or snapshot. 977 raise_if_missing: Raises an error if a model is not found. 978 979 Returns: 980 The expected model. 981 """ 982 if isinstance(model_or_snapshot, Snapshot): 983 return model_or_snapshot.model 984 if not isinstance(model_or_snapshot, str): 985 return model_or_snapshot 986 987 try: 988 # We should try all dialects referenced in the project for cases when models use mixed dialects. 989 for dialect in self._all_dialects: 990 normalized_name = normalize_model_name( 991 model_or_snapshot, 992 dialect=dialect, 993 default_catalog=self.default_catalog, 994 ) 995 if normalized_name in self._models: 996 return self._models[normalized_name] 997 except: 998 pass 999 1000 if raise_if_missing: 1001 if model_or_snapshot.endswith((".sql", ".py")): 1002 msg = "Resolving models by path is not supported, please pass in the model name instead." 1003 else: 1004 msg = f"Cannot find model with name '{model_or_snapshot}'" 1005 1006 raise SQLMeshError(msg) 1007 1008 return None 1009 1010 @t.overload 1011 def get_snapshot(self, node_or_snapshot: NodeOrSnapshot) -> t.Optional[Snapshot]: ... 1012 1013 @t.overload 1014 def get_snapshot( 1015 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[True] 1016 ) -> Snapshot: ... 1017 1018 @t.overload 1019 def get_snapshot( 1020 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[False] 1021 ) -> t.Optional[Snapshot]: ... 1022 1023 def get_snapshot( 1024 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: bool = False 1025 ) -> t.Optional[Snapshot]: 1026 """Returns a snapshot with the given name or None if a snapshot with such name doesn't exist. 1027 1028 Args: 1029 node_or_snapshot: A node name, node, or snapshot. 1030 raise_if_missing: Raises an error if a snapshot is not found. 1031 1032 Returns: 1033 The expected snapshot. 1034 """ 1035 if isinstance(node_or_snapshot, Snapshot): 1036 return node_or_snapshot 1037 fqn = self._node_or_snapshot_to_fqn(node_or_snapshot) 1038 snapshot = self.snapshots.get(fqn) 1039 1040 if raise_if_missing and not snapshot: 1041 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1042 1043 return snapshot 1044 1045 def config_for_path(self, path: Path) -> t.Tuple[Config, Path]: 1046 """Returns the config and path of the said project for a given file path.""" 1047 for config_path, config in self.configs.items(): 1048 try: 1049 path.relative_to(config_path) 1050 return config, config_path 1051 except ValueError: 1052 pass 1053 return self.config, self.path 1054 1055 def config_for_node(self, node: Model | Audit) -> Config: 1056 path = node._path 1057 if path is None: 1058 return self.config 1059 return self.config_for_path(path)[0] # type: ignore 1060 1061 @property 1062 def models(self) -> MappingProxyType[str, Model]: 1063 """Returns all registered models in this context.""" 1064 return MappingProxyType(self._models) 1065 1066 @property 1067 def metrics(self) -> MappingProxyType[str, Metric]: 1068 """Returns all registered metrics in this context.""" 1069 return MappingProxyType(self._metrics) 1070 1071 @property 1072 def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: 1073 """Returns all registered standalone audits in this context.""" 1074 return MappingProxyType(self._standalone_audits) 1075 1076 @property 1077 def models_with_tests(self) -> t.Set[str]: 1078 """Returns all models with tests in this context.""" 1079 return self._models_with_tests 1080 1081 @property 1082 def snapshots(self) -> t.Dict[str, Snapshot]: 1083 """Generates and returns snapshots based on models registered in this context. 1084 1085 If one of the snapshots has been previously stored in the persisted state, the stored 1086 instance will be returned. 1087 """ 1088 return self._snapshots() 1089 1090 @property 1091 def requirements(self) -> t.Dict[str, str]: 1092 """Returns the Python dependencies of the project loaded in this context.""" 1093 return self._requirements.copy() 1094 1095 @cached_property 1096 def default_catalog(self) -> t.Optional[str]: 1097 return self.default_catalog_per_gateway.get(self.selected_gateway) 1098 1099 @python_api_analytics 1100 def render( 1101 self, 1102 model_or_snapshot: ModelOrSnapshot, 1103 *, 1104 start: t.Optional[TimeLike] = None, 1105 end: t.Optional[TimeLike] = None, 1106 execution_time: t.Optional[TimeLike] = None, 1107 expand: t.Union[bool, t.Iterable[str]] = False, 1108 **kwargs: t.Any, 1109 ) -> exp.Expr: 1110 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 1111 1112 Args: 1113 model_or_snapshot: The model, model name, or snapshot to render. 1114 start: The start of the interval to render. 1115 end: The end of the interval to render. 1116 execution_time: The date/time time reference to use for execution time. Defaults to now. 1117 expand: Whether or not to use expand materialized models, defaults to False. 1118 If True, all referenced models are expanded as raw queries. 1119 If a list, only referenced models are expanded as raw queries. 1120 1121 Returns: 1122 The rendered expression. 1123 """ 1124 execution_time = execution_time or now() 1125 1126 model = self.get_model(model_or_snapshot, raise_if_missing=True) 1127 1128 if expand and not isinstance(expand, bool): 1129 expand = { 1130 normalize_model_name( 1131 x, default_catalog=self.default_catalog, dialect=self.default_dialect 1132 ) 1133 for x in expand 1134 } 1135 1136 expand = self.dag.upstream(model.fqn) if expand is True else expand or [] 1137 1138 if model.is_seed: 1139 import pandas as pd 1140 1141 df = next( 1142 model.render( 1143 context=self.execution_context( 1144 engine_adapter=self._get_engine_adapter(model.gateway) 1145 ), 1146 start=start, 1147 end=end, 1148 execution_time=execution_time, 1149 **kwargs, 1150 ) 1151 ) 1152 return next(pandas_to_sql(t.cast(pd.DataFrame, df), model.columns_to_types)) 1153 1154 snapshots = self.snapshots 1155 deployability_index = DeployabilityIndex.create(snapshots.values(), start=start) 1156 1157 return model.render_query_or_raise( 1158 start=start, 1159 end=end, 1160 execution_time=execution_time, 1161 snapshots=snapshots, 1162 expand=expand, 1163 deployability_index=deployability_index, 1164 engine_adapter=self._get_engine_adapter(model.gateway), 1165 **kwargs, 1166 ) 1167 1168 @python_api_analytics 1169 def evaluate( 1170 self, 1171 model_or_snapshot: ModelOrSnapshot, 1172 start: TimeLike, 1173 end: TimeLike, 1174 execution_time: TimeLike, 1175 limit: t.Optional[int] = None, 1176 **kwargs: t.Any, 1177 ) -> DF: 1178 """Evaluate a model or snapshot (running its query against a DB/Engine). 1179 1180 This method is used to test or iterate on models without side effects. 1181 1182 Args: 1183 model_or_snapshot: The model, model name, or snapshot to render. 1184 start: The start of the interval to evaluate. 1185 end: The end of the interval to evaluate. 1186 execution_time: The date/time time reference to use for execution time. 1187 limit: A limit applied to the model. 1188 """ 1189 snapshots = self.snapshots 1190 fqn = self._node_or_snapshot_to_fqn(model_or_snapshot) 1191 if fqn not in snapshots: 1192 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1193 snapshot = snapshots[fqn] 1194 1195 # Expand all uncategorized parents since physical tables don't exist for them yet 1196 expand = [ 1197 parent 1198 for parent in self.dag.upstream(snapshot.model.fqn) 1199 if (parent_snapshot := snapshots.get(parent)) 1200 and parent_snapshot.is_model 1201 and parent_snapshot.model.is_sql 1202 and not parent_snapshot.categorized 1203 ] 1204 1205 df = self.snapshot_evaluator.evaluate_and_fetch( 1206 snapshot, 1207 start=start, 1208 end=end, 1209 execution_time=execution_time, 1210 snapshots=self.snapshots, 1211 limit=limit or c.DEFAULT_MAX_LIMIT, 1212 expand=expand, 1213 ) 1214 1215 if df is None: 1216 raise RuntimeError(f"Error evaluating {snapshot.name}") 1217 1218 return df 1219 1220 @python_api_analytics 1221 def format( 1222 self, 1223 transpile: t.Optional[str] = None, 1224 rewrite_casts: t.Optional[bool] = None, 1225 append_newline: t.Optional[bool] = None, 1226 *, 1227 check: t.Optional[bool] = None, 1228 paths: t.Optional[t.Tuple[t.Union[str, Path], ...]] = None, 1229 **kwargs: t.Any, 1230 ) -> bool: 1231 """Format all SQL models and audits.""" 1232 filtered_targets = [ 1233 target 1234 for target in chain(self._models.values(), self._audits.values()) 1235 if target._path is not None 1236 and target._path.suffix == ".sql" 1237 and (not paths or any(target._path.samefile(p) for p in paths)) 1238 ] 1239 unformatted_file_paths = [] 1240 1241 for target in filtered_targets: 1242 if ( 1243 target._path is None or target.formatting is False 1244 ): # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop 1245 continue 1246 1247 with open(target._path, "r+", encoding="utf-8") as file: 1248 before = file.read() 1249 1250 after = self._format( 1251 target, 1252 before, 1253 transpile=transpile, 1254 rewrite_casts=rewrite_casts, 1255 append_newline=append_newline, 1256 **kwargs, 1257 ) 1258 1259 if not check: 1260 file.seek(0) 1261 file.write(after) 1262 file.truncate() 1263 elif before != after: 1264 unformatted_file_paths.append(target._path) 1265 1266 if unformatted_file_paths: 1267 for path in unformatted_file_paths: 1268 self.console.log_status_update(f"{path} needs reformatting.") 1269 self.console.log_status_update( 1270 f"\n{len(unformatted_file_paths)} file(s) need reformatting." 1271 ) 1272 return False 1273 1274 return True 1275 1276 def _format( 1277 self, 1278 target: Model | Audit, 1279 before: str, 1280 *, 1281 transpile: t.Optional[str] = None, 1282 rewrite_casts: t.Optional[bool] = None, 1283 append_newline: t.Optional[bool] = None, 1284 **kwargs: t.Any, 1285 ) -> str: 1286 expressions = parse(before, default_dialect=self.config_for_node(target).dialect) 1287 if transpile and is_meta_expression(expressions[0]): 1288 for prop in expressions[0].expressions: 1289 if prop.name.lower() == "dialect": 1290 prop.replace( 1291 exp.Property( 1292 this="dialect", 1293 value=exp.Literal.string(transpile or target.dialect), 1294 ) 1295 ) 1296 1297 format_config = self.config_for_node(target).format 1298 after = format_model_expressions( 1299 expressions, 1300 transpile or target.dialect, 1301 rewrite_casts=( 1302 rewrite_casts if rewrite_casts is not None else not format_config.no_rewrite_casts 1303 ), 1304 **{**format_config.generator_options, **kwargs}, 1305 ) 1306 1307 if append_newline is None: 1308 append_newline = format_config.append_newline 1309 if append_newline: 1310 after += "\n" 1311 1312 return after 1313 1314 @python_api_analytics 1315 def plan( 1316 self, 1317 environment: t.Optional[str] = None, 1318 *, 1319 start: t.Optional[TimeLike] = None, 1320 end: t.Optional[TimeLike] = None, 1321 execution_time: t.Optional[TimeLike] = None, 1322 create_from: t.Optional[str] = None, 1323 skip_tests: t.Optional[bool] = None, 1324 restate_models: t.Optional[t.Iterable[str]] = None, 1325 no_gaps: t.Optional[bool] = None, 1326 skip_backfill: t.Optional[bool] = None, 1327 empty_backfill: t.Optional[bool] = None, 1328 forward_only: t.Optional[bool] = None, 1329 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1330 allow_additive_models: t.Optional[t.Collection[str]] = None, 1331 no_prompts: t.Optional[bool] = None, 1332 auto_apply: t.Optional[bool] = None, 1333 no_auto_categorization: t.Optional[bool] = None, 1334 effective_from: t.Optional[TimeLike] = None, 1335 include_unmodified: t.Optional[bool] = None, 1336 select_models: t.Optional[t.Collection[str]] = None, 1337 backfill_models: t.Optional[t.Collection[str]] = None, 1338 categorizer_config: t.Optional[CategorizerConfig] = None, 1339 enable_preview: t.Optional[bool] = None, 1340 no_diff: t.Optional[bool] = None, 1341 run: t.Optional[bool] = None, 1342 diff_rendered: t.Optional[bool] = None, 1343 skip_linter: t.Optional[bool] = None, 1344 explain: t.Optional[bool] = None, 1345 ignore_cron: t.Optional[bool] = None, 1346 min_intervals: t.Optional[int] = None, 1347 ) -> Plan: 1348 """Interactively creates a plan. 1349 1350 This method compares the current context with the target environment. It then presents 1351 the differences and asks whether to backfill each modified model. 1352 1353 Args: 1354 environment: The environment to diff and plan against. 1355 start: The start date of the backfill if there is one. 1356 end: The end date of the backfill if there is one. 1357 execution_time: The date/time reference to use for execution time. Defaults to now. 1358 create_from: The environment to create the target environment from if it 1359 doesn't exist. If not specified, the "prod" environment will be used. 1360 skip_tests: Unit tests are run by default so this will skip them if enabled 1361 restate_models: A list of either internal or external models, or tags, that need to be restated 1362 for the given plan interval. If the target environment is a production environment, 1363 ALL snapshots that depended on these upstream tables will have their intervals deleted 1364 (even ones not in this current environment). Only the snapshots in this environment will 1365 be backfilled whereas others need to be recovered on a future plan application. For development 1366 environments only snapshots that are part of this plan will be affected. 1367 no_gaps: Whether to ensure that new snapshots for models that are already a 1368 part of the target environment have no data gaps when compared against previous 1369 snapshots for same models. 1370 skip_backfill: Whether to skip the backfill step. Default: False. 1371 empty_backfill: Like skip_backfill, but also records processed intervals. 1372 forward_only: Whether the purpose of the plan is to make forward only changes. 1373 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1374 allow_additive_models: Models whose forward-only changes are allowed to be additive. 1375 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 1376 if this flag is set to true and there are uncategorized changes the plan creation will 1377 fail. Default: False. 1378 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 1379 no_auto_categorization: Indicates whether to disable automatic categorization of model 1380 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1381 option determines the behavior. 1382 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1383 project config by default. 1384 effective_from: The effective date from which to apply forward-only changes on production. 1385 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1386 select_models: A list of model selection strings to filter the models that should be included into this plan. 1387 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1388 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1389 no_diff: Hide text differences for changed models. 1390 run: Whether to run latest intervals as part of the plan application. 1391 diff_rendered: Whether the diff should compare raw vs rendered models 1392 skip_linter: Linter runs by default so this will skip it if enabled 1393 explain: Whether to explain the plan instead of applying it. 1394 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1395 on every model when checking for missing intervals 1396 1397 Returns: 1398 The populated Plan object. 1399 """ 1400 plan_builder = self.plan_builder( 1401 environment, 1402 start=start, 1403 end=end, 1404 execution_time=execution_time, 1405 create_from=create_from, 1406 skip_tests=skip_tests, 1407 restate_models=restate_models, 1408 no_gaps=no_gaps, 1409 skip_backfill=skip_backfill, 1410 empty_backfill=empty_backfill, 1411 forward_only=forward_only, 1412 allow_destructive_models=allow_destructive_models, 1413 allow_additive_models=allow_additive_models, 1414 no_auto_categorization=no_auto_categorization, 1415 effective_from=effective_from, 1416 include_unmodified=include_unmodified, 1417 select_models=select_models, 1418 backfill_models=backfill_models, 1419 categorizer_config=categorizer_config, 1420 enable_preview=enable_preview, 1421 run=run, 1422 diff_rendered=diff_rendered, 1423 skip_linter=skip_linter, 1424 explain=explain, 1425 ignore_cron=ignore_cron, 1426 min_intervals=min_intervals, 1427 ) 1428 1429 plan = plan_builder.build() 1430 1431 if no_auto_categorization or plan.uncategorized: 1432 # Prompts are required if the auto categorization is disabled 1433 # or if there are any uncategorized snapshots in the plan 1434 no_prompts = False 1435 1436 if explain: 1437 auto_apply = True 1438 1439 self.console.plan( 1440 plan_builder, 1441 auto_apply if auto_apply is not None else self.config.plan.auto_apply, 1442 self.default_catalog, 1443 no_diff=no_diff if no_diff is not None else self.config.plan.no_diff, 1444 no_prompts=no_prompts if no_prompts is not None else self.config.plan.no_prompts, 1445 ) 1446 1447 return plan 1448 1449 @python_api_analytics 1450 def plan_builder( 1451 self, 1452 environment: t.Optional[str] = None, 1453 *, 1454 start: t.Optional[TimeLike] = None, 1455 end: t.Optional[TimeLike] = None, 1456 execution_time: t.Optional[TimeLike] = None, 1457 create_from: t.Optional[str] = None, 1458 skip_tests: t.Optional[bool] = None, 1459 restate_models: t.Optional[t.Iterable[str]] = None, 1460 no_gaps: t.Optional[bool] = None, 1461 skip_backfill: t.Optional[bool] = None, 1462 empty_backfill: t.Optional[bool] = None, 1463 forward_only: t.Optional[bool] = None, 1464 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1465 allow_additive_models: t.Optional[t.Collection[str]] = None, 1466 no_auto_categorization: t.Optional[bool] = None, 1467 effective_from: t.Optional[TimeLike] = None, 1468 include_unmodified: t.Optional[bool] = None, 1469 select_models: t.Optional[t.Collection[str]] = None, 1470 backfill_models: t.Optional[t.Collection[str]] = None, 1471 categorizer_config: t.Optional[CategorizerConfig] = None, 1472 enable_preview: t.Optional[bool] = None, 1473 run: t.Optional[bool] = None, 1474 diff_rendered: t.Optional[bool] = None, 1475 skip_linter: t.Optional[bool] = None, 1476 explain: t.Optional[bool] = None, 1477 ignore_cron: t.Optional[bool] = None, 1478 min_intervals: t.Optional[int] = None, 1479 always_include_local_changes: t.Optional[bool] = None, 1480 ) -> PlanBuilder: 1481 """Creates a plan builder. 1482 1483 Args: 1484 environment: The environment to diff and plan against. 1485 start: The start date of the backfill if there is one. 1486 end: The end date of the backfill if there is one. 1487 execution_time: The date/time reference to use for execution time. Defaults to now. 1488 create_from: The environment to create the target environment from if it 1489 doesn't exist. If not specified, the "prod" environment will be used. 1490 skip_tests: Unit tests are run by default so this will skip them if enabled 1491 restate_models: A list of either internal or external models, or tags, that need to be restated 1492 for the given plan interval. If the target environment is a production environment, 1493 ALL snapshots that depended on these upstream tables will have their intervals deleted 1494 (even ones not in this current environment). Only the snapshots in this environment will 1495 be backfilled whereas others need to be recovered on a future plan application. For development 1496 environments only snapshots that are part of this plan will be affected. 1497 no_gaps: Whether to ensure that new snapshots for models that are already a 1498 part of the target environment have no data gaps when compared against previous 1499 snapshots for same models. 1500 skip_backfill: Whether to skip the backfill step. Default: False. 1501 empty_backfill: Like skip_backfill, but also records processed intervals. 1502 forward_only: Whether the purpose of the plan is to make forward only changes. 1503 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1504 no_auto_categorization: Indicates whether to disable automatic categorization of model 1505 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1506 option determines the behavior. 1507 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1508 project config by default. 1509 effective_from: The effective date from which to apply forward-only changes on production. 1510 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1511 select_models: A list of model selection strings to filter the models that should be included into this plan. 1512 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1513 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1514 run: Whether to run latest intervals as part of the plan application. 1515 diff_rendered: Whether the diff should compare raw vs rendered models 1516 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1517 on every model when checking for missing intervals 1518 always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. 1519 However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. 1520 1521 Returns: 1522 The plan builder. 1523 """ 1524 kwargs: t.Dict[str, t.Optional[UserProvidedFlags]] = { 1525 "start": start, 1526 "end": end, 1527 "execution_time": execution_time, 1528 "create_from": create_from, 1529 "skip_tests": skip_tests, 1530 "restate_models": list(restate_models) if restate_models is not None else None, 1531 "no_gaps": no_gaps, 1532 "skip_backfill": skip_backfill, 1533 "empty_backfill": empty_backfill, 1534 "forward_only": forward_only, 1535 "allow_destructive_models": list(allow_destructive_models) 1536 if allow_destructive_models is not None 1537 else None, 1538 "allow_additive_models": list(allow_additive_models) 1539 if allow_additive_models is not None 1540 else None, 1541 "no_auto_categorization": no_auto_categorization, 1542 "effective_from": effective_from, 1543 "include_unmodified": include_unmodified, 1544 "select_models": list(select_models) if select_models is not None else None, 1545 "backfill_models": list(backfill_models) if backfill_models is not None else None, 1546 "enable_preview": enable_preview, 1547 "run": run, 1548 "diff_rendered": diff_rendered, 1549 "skip_linter": skip_linter, 1550 "min_intervals": min_intervals, 1551 } 1552 user_provided_flags: t.Dict[str, UserProvidedFlags] = { 1553 k: v for k, v in kwargs.items() if v is not None 1554 } 1555 1556 skip_tests = explain or skip_tests or False 1557 no_gaps = no_gaps or False 1558 skip_backfill = skip_backfill or False 1559 empty_backfill = empty_backfill or False 1560 run = run or False 1561 diff_rendered = diff_rendered or False 1562 skip_linter = skip_linter or False 1563 min_intervals = min_intervals or 0 1564 1565 environment = environment or self.config.default_target_environment 1566 environment = Environment.sanitize_name(environment) 1567 is_dev = environment != c.PROD 1568 1569 if include_unmodified is None: 1570 include_unmodified = self.config.plan.include_unmodified 1571 1572 if skip_backfill and not no_gaps and not is_dev: 1573 # note: we deliberately don't mention the --no-gaps flag in case the plan came from the sqlmesh_dbt command 1574 # todo: perhaps we could have better error messages if we check sys.argv[0] for which cli is running? 1575 self.console.log_warning( 1576 "Skipping the backfill stage for production can lead to unexpected results, such as tables being empty or incremental data with non-contiguous time ranges being made available.\n" 1577 "If you are doing this deliberately to create an empty version of a table to test a change, please consider using Virtual Data Environments instead." 1578 ) 1579 1580 if not skip_linter: 1581 self.lint_models() 1582 1583 self._run_plan_tests(skip_tests=skip_tests) 1584 1585 environment_ttl = ( 1586 self.environment_ttl if environment not in self.pinned_environments else None 1587 ) 1588 1589 model_selector = self._new_selector() 1590 1591 if allow_destructive_models: 1592 expanded_destructive_models = model_selector.expand_model_selections( 1593 allow_destructive_models 1594 ) 1595 else: 1596 expanded_destructive_models = None 1597 1598 if allow_additive_models: 1599 expanded_additive_models = model_selector.expand_model_selections(allow_additive_models) 1600 else: 1601 expanded_additive_models = None 1602 1603 if backfill_models: 1604 backfill_models = model_selector.expand_model_selections(backfill_models) 1605 else: 1606 backfill_models = None 1607 1608 models_override: t.Optional[UniqueKeyDict[str, Model]] = None 1609 if select_models: 1610 try: 1611 models_override = model_selector.select_models( 1612 select_models, 1613 environment, 1614 fallback_env_name=create_from or c.PROD, 1615 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1616 ) 1617 except SQLMeshError as e: 1618 logger.exception(e) # ensure the full stack trace is logged 1619 raise PlanError( 1620 f"{e}\nCheck the SQLMesh log file for the full stack trace.\nIf the model has been fixed locally, please ensure that the --select-model expression includes it." 1621 ) 1622 if not backfill_models: 1623 # Only backfill selected models unless explicitly specified. 1624 backfill_models = model_selector.expand_model_selections(select_models) 1625 1626 expanded_restate_models = None 1627 if restate_models is not None: 1628 expanded_restate_models = model_selector.expand_model_selections(restate_models) 1629 1630 if (restate_models is not None and not expanded_restate_models) or ( 1631 backfill_models is not None and not backfill_models 1632 ): 1633 raise PlanError( 1634 "Selector did not return any models. Please check your model selection and try again." 1635 ) 1636 1637 if always_include_local_changes is None: 1638 # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes 1639 force_no_diff = restate_models is not None or ( 1640 backfill_models is not None and not backfill_models 1641 ) 1642 else: 1643 force_no_diff = not always_include_local_changes 1644 1645 snapshots = self._snapshots(models_override) 1646 context_diff = self._context_diff( 1647 environment or c.PROD, 1648 snapshots=snapshots, 1649 create_from=create_from, 1650 force_no_diff=force_no_diff, 1651 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1652 diff_rendered=diff_rendered, 1653 always_recreate_environment=self.config.plan.always_recreate_environment, 1654 ) 1655 modified_model_names = { 1656 *context_diff.modified_snapshots, 1657 *[s.name for s in context_diff.added], 1658 } 1659 1660 if ( 1661 is_dev 1662 and not include_unmodified 1663 and backfill_models is None 1664 and expanded_restate_models is None 1665 ): 1666 # Only backfill modified and added models. 1667 # This ensures that no models outside the impacted sub-DAG(s) will be backfilled unexpectedly. 1668 backfill_models = modified_model_names or None 1669 1670 max_interval_end_per_model = None 1671 default_start, default_end = None, None 1672 if not run: 1673 ignore_cron = False 1674 max_interval_end_per_model = self._get_max_interval_end_per_model( 1675 snapshots, backfill_models 1676 ) 1677 # If no end date is specified, use the max interval end from prod 1678 # to prevent unintended evaluation of the entire DAG. 1679 default_start, default_end = self._get_plan_default_start_end( 1680 snapshots, 1681 max_interval_end_per_model, 1682 backfill_models, 1683 modified_model_names, 1684 execution_time or now(), 1685 ) 1686 1687 # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model. 1688 self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values()) 1689 1690 start_override_per_model = self._calculate_start_override_per_model( 1691 min_intervals, 1692 start or default_start, 1693 end or default_end, 1694 execution_time or now(), 1695 backfill_models, 1696 snapshots, 1697 max_interval_end_per_model, 1698 ) 1699 1700 if not self.config.virtual_environment_mode.is_full: 1701 forward_only = True 1702 elif forward_only is None: 1703 forward_only = self.config.plan.forward_only 1704 1705 # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments 1706 # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition 1707 restate_all_snapshots = ( 1708 expanded_restate_models is not None 1709 and not is_dev 1710 and self.config.virtual_environment_mode.is_full 1711 ) 1712 1713 return self.PLAN_BUILDER_TYPE( 1714 context_diff=context_diff, 1715 start=start, 1716 end=end, 1717 execution_time=execution_time, 1718 apply=self.apply, 1719 restate_models=expanded_restate_models, 1720 restate_all_snapshots=restate_all_snapshots, 1721 backfill_models=backfill_models, 1722 no_gaps=no_gaps, 1723 skip_backfill=skip_backfill, 1724 empty_backfill=empty_backfill, 1725 is_dev=is_dev, 1726 forward_only=forward_only, 1727 allow_destructive_models=expanded_destructive_models, 1728 allow_additive_models=expanded_additive_models, 1729 environment_ttl=environment_ttl, 1730 environment_suffix_target=self.config.environment_suffix_target, 1731 environment_catalog_mapping=self.environment_catalog_mapping, 1732 categorizer_config=categorizer_config or self.auto_categorize_changes, 1733 auto_categorization_enabled=not no_auto_categorization, 1734 effective_from=effective_from, 1735 include_unmodified=include_unmodified, 1736 default_start=default_start, 1737 default_end=default_end, 1738 enable_preview=( 1739 enable_preview if enable_preview is not None else self._plan_preview_enabled 1740 ), 1741 end_bounded=not run, 1742 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1743 start_override_per_model=start_override_per_model, 1744 end_override_per_model=max_interval_end_per_model, 1745 console=self.console, 1746 user_provided_flags=user_provided_flags, 1747 selected_models={ 1748 dbt_unique_id 1749 for model in model_selector.expand_model_selections(select_models or "*") 1750 if (dbt_unique_id := snapshots[model].node.dbt_unique_id) 1751 }, 1752 explain=explain or False, 1753 ignore_cron=ignore_cron or False, 1754 ) 1755 1756 def apply( 1757 self, 1758 plan: Plan, 1759 circuit_breaker: t.Optional[t.Callable[[], bool]] = None, 1760 ) -> None: 1761 """Applies a plan by pushing snapshots and backfilling data. 1762 1763 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 1764 to backfill all models. 1765 1766 Args: 1767 plan: The plan to apply. 1768 circuit_breaker: An optional handler which checks if the apply should be aborted. 1769 """ 1770 if ( 1771 not plan.context_diff.has_changes 1772 and not plan.requires_backfill 1773 and not plan.has_unmodified_unpromoted 1774 ): 1775 return 1776 if plan.uncategorized: 1777 raise UncategorizedPlanError("Can't apply a plan with uncategorized changes.") 1778 1779 if plan.explain: 1780 explainer = PlanExplainer( 1781 state_reader=self.state_reader, 1782 default_catalog=self.default_catalog, 1783 console=self.console, 1784 ) 1785 explainer.evaluate(plan.to_evaluatable()) 1786 return 1787 1788 self.notification_target_manager.notify( 1789 NotificationEvent.APPLY_START, 1790 environment=plan.environment_naming_info.name, 1791 plan_id=plan.plan_id, 1792 ) 1793 try: 1794 self._apply(plan, circuit_breaker) 1795 except Exception as e: 1796 self.notification_target_manager.notify( 1797 NotificationEvent.APPLY_FAILURE, 1798 environment=plan.environment_naming_info.name, 1799 plan_id=plan.plan_id, 1800 exc=traceback.format_exc(), 1801 ) 1802 logger.info("Plan application failed.", exc_info=e) 1803 raise e 1804 self.notification_target_manager.notify( 1805 NotificationEvent.APPLY_END, 1806 environment=plan.environment_naming_info.name, 1807 plan_id=plan.plan_id, 1808 ) 1809 1810 @python_api_analytics 1811 def invalidate_environment(self, name: str, sync: bool = False) -> None: 1812 """Invalidates the target environment by setting its expiration timestamp to now. 1813 1814 Args: 1815 name: The name of the environment to invalidate. 1816 sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will 1817 be deleted asynchronously by the janitor process. 1818 """ 1819 name = Environment.sanitize_name(name) 1820 self.state_sync.invalidate_environment(name) 1821 if sync: 1822 self._cleanup_environments() 1823 self.console.log_success(f"Environment '{name}' deleted.") 1824 else: 1825 self.console.log_success(f"Environment '{name}' invalidated.") 1826 1827 @python_api_analytics 1828 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> bool: 1829 """Show a diff of the current context with a given environment. 1830 1831 Args: 1832 environment: The environment to diff against. 1833 detailed: Show the actual SQL differences if True. 1834 1835 Returns: 1836 True if there are changes, False otherwise. 1837 """ 1838 environment = environment or self.config.default_target_environment 1839 environment = Environment.sanitize_name(environment) 1840 context_diff = self._context_diff(environment) 1841 self.console.show_environment_difference_summary( 1842 context_diff, 1843 no_diff=not detailed, 1844 ) 1845 if context_diff.has_changes: 1846 self.console.show_model_difference_summary( 1847 context_diff, 1848 EnvironmentNamingInfo.from_environment_catalog_mapping( 1849 self.environment_catalog_mapping, 1850 name=environment, 1851 suffix_target=self.config.environment_suffix_target, 1852 normalize_name=context_diff.normalize_environment_name, 1853 ), 1854 self.default_catalog, 1855 no_diff=not detailed, 1856 ) 1857 return context_diff.has_changes 1858 1859 @python_api_analytics 1860 def table_diff( 1861 self, 1862 source: str, 1863 target: str, 1864 on: t.Optional[t.List[str] | exp.Expr] = None, 1865 skip_columns: t.Optional[t.List[str]] = None, 1866 select_models: t.Optional[t.Collection[str]] = None, 1867 where: t.Optional[str | exp.Expr] = None, 1868 limit: int = 20, 1869 show: bool = True, 1870 show_sample: bool = True, 1871 decimals: int = 3, 1872 skip_grain_check: bool = False, 1873 warn_grain_check: bool = False, 1874 temp_schema: t.Optional[str] = None, 1875 schema_diff_ignore_case: bool = False, 1876 **kwargs: t.Any, # catch-all to prevent an 'unexpected keyword argument' error if an table_diff extension passes in some extra arguments 1877 ) -> t.List[TableDiff]: 1878 """Show a diff between two tables. 1879 1880 Args: 1881 source: The source environment or table. 1882 target: The target environment or table. 1883 on: The join condition, table aliases must be "s" and "t" for source and target. 1884 If omitted, the table's grain will be used. 1885 skip_columns: The columns to skip when computing the table diff. 1886 select_models: The models or snapshots to use when environments are passed in. 1887 where: An optional where statement to filter results. 1888 limit: The limit of the sample dataframe. 1889 show: Show the table diff output in the console. 1890 show_sample: Show the sample dataframe in the console. Requires show=True. 1891 decimals: The number of decimal places to keep when comparing floating point columns. 1892 skip_grain_check: Skip check for rows that contain null or duplicate grains. 1893 temp_schema: The schema to use for temporary tables. 1894 1895 Returns: 1896 The list of TableDiff objects containing schema and summary differences. 1897 """ 1898 1899 if "|" in source or "|" in target: 1900 raise ConfigError( 1901 "Cross-database table diffing is available in Tobiko Cloud. Read more here: " 1902 "https://sqlmesh.readthedocs.io/en/stable/guides/tablediff/#diffing-tables-or-views-across-gateways" 1903 ) 1904 1905 table_diffs: t.List[TableDiff] = [] 1906 1907 # Diffs multiple or a single model across two environments 1908 if select_models: 1909 source_env = self.state_reader.get_environment(source) 1910 target_env = self.state_reader.get_environment(target) 1911 if not source_env: 1912 raise SQLMeshError(f"Could not find environment '{source}'") 1913 if not target_env: 1914 raise SQLMeshError(f"Could not find environment '{target}'") 1915 criteria = ", ".join(f"'{c}'" for c in select_models) 1916 try: 1917 selected_models = self._new_selector().expand_model_selections(select_models) 1918 if not selected_models: 1919 self.console.log_status_update( 1920 f"No models matched the selection criteria: {criteria}" 1921 ) 1922 except Exception as e: 1923 raise SQLMeshError(e) 1924 1925 models_to_diff: t.List[ 1926 t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Expr]] 1927 ] = [] 1928 models_without_grain: t.List[Model] = [] 1929 source_snapshots_to_name = { 1930 snapshot.name: snapshot for snapshot in source_env.snapshots 1931 } 1932 target_snapshots_to_name = { 1933 snapshot.name: snapshot for snapshot in target_env.snapshots 1934 } 1935 1936 for model_fqn in selected_models: 1937 model = self._models[model_fqn] 1938 adapter = self._get_engine_adapter(model.gateway) 1939 source_snapshot = source_snapshots_to_name.get(model.fqn) 1940 target_snapshot = target_snapshots_to_name.get(model.fqn) 1941 1942 if target_snapshot and source_snapshot: 1943 if (source_snapshot.fingerprint != target_snapshot.fingerprint) and ( 1944 (source_snapshot.version != target_snapshot.version) 1945 or source_snapshot.is_forward_only 1946 ): 1947 # Compare the virtual layer instead of the physical layer because the virtual layer is guaranteed to point 1948 # to the correct/active snapshot for the model in the specified environment, taking into account things like dev previews 1949 source = source_snapshot.qualified_view_name.for_environment( 1950 source_env.naming_info, adapter.dialect 1951 ) 1952 target = target_snapshot.qualified_view_name.for_environment( 1953 target_env.naming_info, adapter.dialect 1954 ) 1955 model_on = on or model.on 1956 if not model_on: 1957 models_without_grain.append(model) 1958 else: 1959 models_to_diff.append((model, adapter, source, target, model_on)) 1960 1961 if models_without_grain: 1962 model_names = "\n".join( 1963 f"─ {model.name} \n at '{model._path}'" for model in models_without_grain 1964 ) 1965 message = ( 1966 "SQLMesh doesn't know how to join the tables for the following models:\n" 1967 f"{model_names}\n\n" 1968 "Please specify a `grain` in each model definition. It must be unique and not null." 1969 ) 1970 if warn_grain_check: 1971 self.console.log_warning(message) 1972 else: 1973 raise SQLMeshError(message) 1974 1975 if models_to_diff: 1976 self.console.show_table_diff_details( 1977 [model[0].name for model in models_to_diff], 1978 ) 1979 1980 self.console.start_table_diff_progress(len(models_to_diff)) 1981 try: 1982 tasks_num = min(len(models_to_diff), self.concurrent_tasks) 1983 table_diffs = concurrent_apply_to_values( 1984 list(models_to_diff), 1985 lambda model_info: self._model_diff( 1986 model=model_info[0], 1987 adapter=model_info[1], 1988 source=model_info[2], 1989 target=model_info[3], 1990 on=model_info[4], 1991 source_alias=source_env.name, 1992 target_alias=target_env.name, 1993 limit=limit, 1994 decimals=decimals, 1995 skip_columns=skip_columns, 1996 where=where, 1997 show=show, 1998 temp_schema=temp_schema, 1999 skip_grain_check=skip_grain_check, 2000 schema_diff_ignore_case=schema_diff_ignore_case, 2001 ), 2002 tasks_num=tasks_num, 2003 ) 2004 self.console.stop_table_diff_progress(success=True) 2005 except: 2006 self.console.stop_table_diff_progress(success=False) 2007 raise 2008 elif selected_models: 2009 self.console.log_status_update( 2010 f"No models contain differences with the selection criteria: {criteria}" 2011 ) 2012 2013 else: 2014 table_diffs = [ 2015 self._table_diff( 2016 source=source, 2017 target=target, 2018 source_alias=source, 2019 target_alias=target, 2020 limit=limit, 2021 decimals=decimals, 2022 adapter=self.engine_adapter, 2023 on=on, 2024 skip_columns=skip_columns, 2025 where=where, 2026 schema_diff_ignore_case=schema_diff_ignore_case, 2027 ) 2028 ] 2029 2030 if show: 2031 self.console.show_table_diff(table_diffs, show_sample, skip_grain_check, temp_schema) 2032 2033 return table_diffs 2034 2035 def _model_diff( 2036 self, 2037 model: Model, 2038 adapter: EngineAdapter, 2039 source: str, 2040 target: str, 2041 source_alias: str, 2042 target_alias: str, 2043 limit: int, 2044 decimals: int, 2045 on: t.Optional[t.List[str] | exp.Expr] = None, 2046 skip_columns: t.Optional[t.List[str]] = None, 2047 where: t.Optional[str | exp.Expr] = None, 2048 show: bool = True, 2049 temp_schema: t.Optional[str] = None, 2050 skip_grain_check: bool = False, 2051 schema_diff_ignore_case: bool = False, 2052 ) -> TableDiff: 2053 self.console.start_table_diff_model_progress(model.name) 2054 2055 table_diff = self._table_diff( 2056 on=on, 2057 skip_columns=skip_columns, 2058 where=where, 2059 limit=limit, 2060 decimals=decimals, 2061 model=model, 2062 adapter=adapter, 2063 source=source, 2064 target=target, 2065 source_alias=source_alias, 2066 target_alias=target_alias, 2067 schema_diff_ignore_case=schema_diff_ignore_case, 2068 ) 2069 2070 if show: 2071 # Trigger row_diff in parallel execution so it's available for ordered display later 2072 table_diff.row_diff(temp_schema=temp_schema, skip_grain_check=skip_grain_check) 2073 2074 self.console.update_table_diff_progress(model.name) 2075 2076 return table_diff 2077 2078 def _table_diff( 2079 self, 2080 source: str, 2081 target: str, 2082 source_alias: str, 2083 target_alias: str, 2084 limit: int, 2085 decimals: int, 2086 adapter: EngineAdapter, 2087 on: t.Optional[t.List[str] | exp.Expr] = None, 2088 model: t.Optional[Model] = None, 2089 skip_columns: t.Optional[t.List[str]] = None, 2090 where: t.Optional[str | exp.Expr] = None, 2091 schema_diff_ignore_case: bool = False, 2092 ) -> TableDiff: 2093 if not on: 2094 raise SQLMeshError( 2095 "SQLMesh doesn't know how to join the two tables. Specify the `grains` in each model definition or pass join column names in separate `-o` flags." 2096 ) 2097 2098 return TableDiff( 2099 adapter=adapter.with_settings(execute_log_level=logger.getEffectiveLevel()), 2100 source=source, 2101 target=target, 2102 on=on, 2103 skip_columns=skip_columns, 2104 where=where, 2105 source_alias=source_alias, 2106 target_alias=target_alias, 2107 limit=limit, 2108 decimals=decimals, 2109 model_name=model.name if model else None, 2110 model_dialect=model.dialect if model else None, 2111 schema_diff_ignore_case=schema_diff_ignore_case, 2112 ) 2113 2114 @python_api_analytics 2115 def get_dag( 2116 self, select_models: t.Optional[t.Collection[str]] = None, **options: t.Any 2117 ) -> GraphHTML: 2118 """Gets an HTML object representation of the DAG. 2119 2120 Args: 2121 select_models: A list of model selection strings that should be included in the dag. 2122 Returns: 2123 An html object that renders the dag. 2124 """ 2125 dag = ( 2126 self.dag.prune(*self._new_selector().expand_model_selections(select_models)) 2127 if select_models 2128 else self.dag 2129 ) 2130 2131 nodes = {} 2132 edges: t.List[t.Dict] = [] 2133 2134 for node, deps in dag.graph.items(): 2135 nodes[node] = { 2136 "id": node, 2137 "label": node.split(".")[-1], 2138 "title": f"<span>{node}</span>", 2139 } 2140 edges.extend({"from": d, "to": node} for d in deps) 2141 2142 return GraphHTML( 2143 nodes, 2144 edges, 2145 options={ 2146 "height": "100%", 2147 "width": "100%", 2148 "interaction": {}, 2149 "layout": { 2150 "hierarchical": { 2151 "enabled": True, 2152 "nodeSpacing": 200, 2153 "sortMethod": "directed", 2154 }, 2155 }, 2156 "nodes": { 2157 "shape": "box", 2158 }, 2159 **options, 2160 }, 2161 ) 2162 2163 @python_api_analytics 2164 def render_dag(self, path: str, select_models: t.Optional[t.Collection[str]] = None) -> None: 2165 """Render the dag as HTML and save it to a file. 2166 2167 Args: 2168 path: filename to save the dag html to 2169 select_models: A list of model selection strings that should be included in the dag. 2170 """ 2171 file_path = Path(path) 2172 suffix = file_path.suffix 2173 if suffix != ".html": 2174 if suffix: 2175 get_console().log_warning( 2176 f"The extension {suffix} does not designate an html file. A file with a `.html` extension will be created instead." 2177 ) 2178 path = str(file_path.with_suffix(".html")) 2179 2180 with open(path, "w", encoding="utf-8") as file: 2181 file.write(str(self.get_dag(select_models))) 2182 2183 @python_api_analytics 2184 def create_test( 2185 self, 2186 model: str, 2187 input_queries: t.Dict[str, str], 2188 overwrite: bool = False, 2189 variables: t.Optional[t.Dict[str, str]] = None, 2190 path: t.Optional[str] = None, 2191 name: t.Optional[str] = None, 2192 include_ctes: bool = False, 2193 ) -> None: 2194 """Generate a unit test fixture for a given model. 2195 2196 Args: 2197 model: The model to test. 2198 input_queries: Mapping of model names to queries. Each model included in this mapping 2199 will be populated in the test based on the results of the corresponding query. 2200 overwrite: Whether to overwrite the existing test in case of a file path collision. 2201 When set to False, an error will be raised if there is such a collision. 2202 variables: Key-value pairs that will define variables needed by the model. 2203 path: The file path corresponding to the fixture, relative to the test directory. 2204 By default, the fixture will be created under the test directory and the file name 2205 will be inferred from the test's name. 2206 name: The name of the test. This is inferred from the model name by default. 2207 include_ctes: When true, CTE fixtures will also be generated. 2208 """ 2209 input_queries = { 2210 # The get_model here has two purposes: return normalized names & check for missing deps 2211 self.get_model(dep, raise_if_missing=True).fqn: query 2212 for dep, query in input_queries.items() 2213 } 2214 2215 try: 2216 model_to_test = self.get_model(model, raise_if_missing=True) 2217 test_adapter = self.test_connection_config.create_engine_adapter( 2218 register_comments_override=False 2219 ) 2220 2221 generate_test( 2222 model=model_to_test, 2223 input_queries=input_queries, 2224 models=self._models, 2225 engine_adapter=self._get_engine_adapter(model_to_test.gateway), 2226 test_engine_adapter=test_adapter, 2227 project_path=self.path, 2228 overwrite=overwrite, 2229 variables=variables, 2230 path=path, 2231 name=name, 2232 include_ctes=include_ctes, 2233 ) 2234 finally: 2235 if test_adapter: 2236 test_adapter.close() 2237 2238 @python_api_analytics 2239 def test( 2240 self, 2241 match_patterns: t.Optional[t.List[str]] = None, 2242 tests: t.Optional[t.List[str]] = None, 2243 verbosity: Verbosity = Verbosity.DEFAULT, 2244 preserve_fixtures: bool = False, 2245 stream: t.Optional[t.TextIO] = None, 2246 ) -> ModelTextTestResult: 2247 """Discover and run model tests""" 2248 if verbosity >= Verbosity.VERBOSE: 2249 import pandas as pd 2250 2251 pd.set_option("display.max_columns", None) 2252 2253 test_meta = self.select_tests(tests=tests, patterns=match_patterns) 2254 2255 result = run_tests( 2256 model_test_metadata=test_meta, 2257 models=self._models, 2258 config=self.config, 2259 selected_gateway=self.selected_gateway, 2260 dialect=self.default_dialect, 2261 verbosity=verbosity, 2262 preserve_fixtures=preserve_fixtures, 2263 stream=stream, 2264 default_catalog=self.default_catalog, 2265 default_catalog_dialect=self.config.dialect or "", 2266 ) 2267 2268 self.console.log_test_results( 2269 result, 2270 self.test_connection_config._engine_adapter.DIALECT, 2271 ) 2272 2273 return result 2274 2275 @python_api_analytics 2276 def audit( 2277 self, 2278 start: TimeLike, 2279 end: TimeLike, 2280 *, 2281 models: t.Optional[t.Iterator[str]] = None, 2282 execution_time: t.Optional[TimeLike] = None, 2283 ) -> bool: 2284 """Audit models. 2285 2286 Args: 2287 start: The start of the interval to audit. 2288 end: The end of the interval to audit. 2289 models: The models to audit. All models will be audited if not specified. 2290 execution_time: The date/time time reference to use for execution time. Defaults to now. 2291 2292 Returns: 2293 False if any of the audits failed, True otherwise. 2294 """ 2295 2296 snapshots = ( 2297 [self.get_snapshot(model, raise_if_missing=True) for model in models] 2298 if models 2299 else self.snapshots.values() 2300 ) 2301 2302 num_audits = sum(len(snapshot.node.audits_with_args) for snapshot in snapshots) 2303 self.console.log_status_update(f"Found {num_audits} audit(s).") 2304 2305 errors = [] 2306 skipped_count = 0 2307 for snapshot in snapshots: 2308 for audit_result in self.snapshot_evaluator.audit( 2309 snapshot=snapshot, 2310 start=start, 2311 end=end, 2312 execution_time=execution_time, 2313 snapshots=self.snapshots, 2314 ): 2315 audit_id = f"{audit_result.audit.name}" 2316 if audit_result.model: 2317 audit_id += f" on model {audit_result.model.name}" 2318 2319 if audit_result.skipped: 2320 self.console.log_status_update(f"{audit_id} ⏸️ SKIPPED.") 2321 skipped_count += 1 2322 elif audit_result.count: 2323 errors.append(audit_result) 2324 self.console.log_status_update( 2325 f"{audit_id} ❌ [red]FAIL [{audit_result.count}][/red]." 2326 ) 2327 else: 2328 self.console.log_status_update(f"{audit_id} ✅ [green]PASS[/green].") 2329 2330 self.console.log_status_update( 2331 f"\nFinished with {len(errors)} audit error{'' if len(errors) == 1 else 's'} " 2332 f"and {skipped_count} audit{'' if skipped_count == 1 else 's'} skipped." 2333 ) 2334 for error in errors: 2335 self.console.log_status_update( 2336 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 2337 ) 2338 self.console.log_status_update(f"Got {error.count} results, expected 0.") 2339 if error.query: 2340 self.console.show_sql( 2341 f"{error.query.sql(dialect=self.snapshot_evaluator.adapter.dialect)}" 2342 ) 2343 2344 self.console.log_status_update("Done.") 2345 return not errors 2346 2347 @python_api_analytics 2348 def rewrite(self, sql: str, dialect: str = "") -> exp.Expr: 2349 """Rewrite a sql expression with semantic references into an executable query. 2350 2351 https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ 2352 2353 Args: 2354 sql: The sql string to rewrite. 2355 dialect: The dialect of the sql string, defaults to the project dialect. 2356 2357 Returns: 2358 A SQLGlot expression with semantic references expanded. 2359 """ 2360 return rewrite( 2361 sql, 2362 graph=ReferenceGraph(self.models.values()), 2363 metrics=self._metrics, 2364 dialect=dialect or self.default_dialect, 2365 ) 2366 2367 @python_api_analytics 2368 def check_intervals( 2369 self, 2370 environment: t.Optional[str], 2371 no_signals: bool, 2372 select_models: t.Collection[str], 2373 start: t.Optional[TimeLike] = None, 2374 end: t.Optional[TimeLike] = None, 2375 ) -> t.Dict[Snapshot, SnapshotIntervals]: 2376 """Check intervals for a given environment. 2377 2378 Args: 2379 environment: The environment or prod if None. 2380 select_models: A list of model selection strings to show intervals for. 2381 start: The start of the intervals to check. 2382 end: The end of the intervals to check. 2383 """ 2384 2385 environment = environment or c.PROD 2386 env = self.state_reader.get_environment(environment) 2387 if not env: 2388 raise SQLMeshError(f"Environment '{environment}' was not found.") 2389 2390 snapshots = {k.name: v for k, v in self.state_sync.get_snapshots(env.snapshots).items()} 2391 2392 missing = { 2393 k.name: v 2394 for k, v in missing_intervals( 2395 snapshots.values(), start=start, end=end, execution_time=end 2396 ).items() 2397 } 2398 2399 if select_models: 2400 selected: t.Collection[str] = self._select_models_for_run( 2401 select_models, True, snapshots.values() 2402 ) 2403 else: 2404 selected = snapshots.keys() 2405 2406 results = {} 2407 execution_context = self.execution_context(snapshots=snapshots) 2408 2409 for fqn in selected: 2410 snapshot = snapshots[fqn] 2411 intervals = missing.get(fqn) or [] 2412 2413 results[snapshot] = SnapshotIntervals( 2414 snapshot.snapshot_id, 2415 intervals 2416 if no_signals 2417 else snapshot.check_ready_intervals(intervals, execution_context), 2418 ) 2419 2420 return results 2421 2422 @python_api_analytics 2423 def migrate(self) -> None: 2424 """Migrates SQLMesh to the current running version. 2425 2426 Please contact your SQLMesh administrator before doing this. 2427 """ 2428 self.notification_target_manager.notify(NotificationEvent.MIGRATION_START) 2429 self._load_materializations() 2430 try: 2431 self._new_state_sync().migrate( 2432 promoted_snapshots_only=self.config.migration.promoted_snapshots_only, 2433 ) 2434 except Exception as e: 2435 self.notification_target_manager.notify( 2436 NotificationEvent.MIGRATION_FAILURE, traceback.format_exc() 2437 ) 2438 raise e 2439 self.notification_target_manager.notify(NotificationEvent.MIGRATION_END) 2440 2441 @python_api_analytics 2442 def rollback(self) -> None: 2443 """Rolls back SQLMesh to the previous migration. 2444 2445 Please contact your SQLMesh administrator before doing this. This action cannot be undone. 2446 """ 2447 self._new_state_sync().rollback() 2448 2449 @python_api_analytics 2450 def create_external_models(self, strict: bool = False) -> None: 2451 """Create a file to document the schema of external models. 2452 2453 The external models file contains all columns and types of external models, allowing for more 2454 robust lineage, validation, and optimizations. 2455 2456 Args: 2457 strict: If True, raise an error if the external model is missing in the database. 2458 """ 2459 if not self._models: 2460 self.load(update_schemas=False) 2461 2462 for path, config in self.configs.items(): 2463 deprecated_yaml = path / c.EXTERNAL_MODELS_DEPRECATED_YAML 2464 2465 external_models_yaml = ( 2466 path / c.EXTERNAL_MODELS_YAML if not deprecated_yaml.exists() else deprecated_yaml 2467 ) 2468 2469 external_models_gateway: t.Optional[str] = self.gateway or self.config.default_gateway 2470 if not external_models_gateway: 2471 # can happen if there was no --gateway defined and the default_gateway is '' 2472 # which means that the single gateway syntax is being used which means there is 2473 # no named gateway which means we should not stamp `gateway:` on the external models 2474 external_models_gateway = None 2475 2476 create_external_models_file( 2477 path=external_models_yaml, 2478 models=UniqueKeyDict( 2479 "models", 2480 { 2481 fqn: model 2482 for fqn, model in self._models.items() 2483 if self.config_for_node(model) is config 2484 }, 2485 ), 2486 adapter=self.engine_adapter, 2487 state_reader=self.state_reader, 2488 dialect=config.model_defaults.dialect, 2489 gateway=external_models_gateway, 2490 max_workers=self.concurrent_tasks, 2491 strict=strict, 2492 ) 2493 2494 @python_api_analytics 2495 def print_info( 2496 self, skip_connection: bool = False, verbosity: Verbosity = Verbosity.DEFAULT 2497 ) -> None: 2498 """Prints information about connections, models, macros, etc. to the console.""" 2499 self.console.log_status_update(f"Models: {len(self.models)}") 2500 self.console.log_status_update(f"Macros: {len(self._macros) - len(macro.get_registry())}") 2501 2502 if skip_connection: 2503 return 2504 2505 if verbosity >= Verbosity.VERBOSE: 2506 self.console.log_status_update("") 2507 print_config(self.config.get_connection(self.gateway), self.console, "Connection") 2508 print_config( 2509 self.config.get_test_connection(self.gateway), self.console, "Test Connection" 2510 ) 2511 print_config( 2512 self.config.get_state_connection(self.gateway), self.console, "State Connection" 2513 ) 2514 2515 self._try_connection("data warehouse", self.engine_adapter.ping) 2516 state_connection = self.config.get_state_connection(self.gateway) 2517 if state_connection: 2518 self._try_connection("state backend", state_connection.connection_validator()) 2519 2520 @python_api_analytics 2521 def print_environment_names(self) -> None: 2522 """Prints all environment names along with expiry datetime.""" 2523 result = self._new_state_sync().get_environments_summary() 2524 if not result: 2525 raise SQLMeshError( 2526 "This project has no environments. Create an environment using the `sqlmesh plan` command." 2527 ) 2528 self.console.print_environments(result) 2529 2530 def close(self) -> None: 2531 """Releases all resources allocated by this context.""" 2532 if self._snapshot_evaluator: 2533 self._snapshot_evaluator.close() 2534 2535 if self._state_sync: 2536 self._state_sync.close() 2537 2538 def _run( 2539 self, 2540 environment: str, 2541 *, 2542 start: t.Optional[TimeLike], 2543 end: t.Optional[TimeLike], 2544 execution_time: t.Optional[TimeLike], 2545 ignore_cron: bool, 2546 select_models: t.Optional[t.Collection[str]], 2547 circuit_breaker: t.Optional[t.Callable[[], bool]], 2548 no_auto_upstream: bool, 2549 ) -> CompletionStatus: 2550 scheduler = self.scheduler(environment=environment) 2551 snapshots = scheduler.snapshots 2552 2553 if select_models is not None: 2554 select_models = self._select_models_for_run( 2555 select_models, no_auto_upstream, snapshots.values() 2556 ) 2557 2558 completion_status = scheduler.run( 2559 environment, 2560 start=start, 2561 end=end, 2562 execution_time=execution_time, 2563 ignore_cron=ignore_cron, 2564 circuit_breaker=circuit_breaker, 2565 selected_snapshots=select_models, 2566 auto_restatement_enabled=environment.lower() == c.PROD, 2567 run_environment_statements=True, 2568 ) 2569 2570 if completion_status.is_nothing_to_do: 2571 next_run_ready_msg = "" 2572 2573 next_ready_interval_start = get_next_model_interval_start(snapshots.values()) 2574 if next_ready_interval_start: 2575 utc_time = format_tz_datetime(next_ready_interval_start) 2576 local_time = format_tz_datetime(next_ready_interval_start, use_local_timezone=True) 2577 time_msg = local_time if local_time == utc_time else f"{local_time} ({utc_time})" 2578 next_run_ready_msg = f"\n\nNext run will be ready at {time_msg}." 2579 2580 self.console.log_status_update( 2581 f"No models are ready to run. Please wait until a model `cron` interval has elapsed.{next_run_ready_msg}" 2582 ) 2583 2584 return completion_status 2585 2586 def _apply(self, plan: Plan, circuit_breaker: t.Optional[t.Callable[[], bool]]) -> None: 2587 self._scheduler.create_plan_evaluator(self).evaluate( 2588 plan.to_evaluatable(), circuit_breaker=circuit_breaker 2589 ) 2590 2591 @python_api_analytics 2592 def table_name( 2593 self, model_name: str, environment: t.Optional[str] = None, prod: bool = False 2594 ) -> str: 2595 """Returns the name of the pysical table for the given model name in the target environment. 2596 2597 Args: 2598 model_name: The name of the model. 2599 environment: The environment to source the model version from. 2600 prod: If True, return the name of the physical table that will be used in production for the model version 2601 promoted in the target environment. 2602 2603 Returns: 2604 The name of the physical table. 2605 """ 2606 environment = environment or self.config.default_target_environment 2607 fqn = self._node_or_snapshot_to_fqn(model_name) 2608 target_env = self.state_reader.get_environment(environment) 2609 if not target_env: 2610 raise SQLMeshError(f"Environment '{environment}' was not found.") 2611 2612 snapshot_info = None 2613 for s in target_env.snapshots: 2614 if s.name == fqn: 2615 snapshot_info = s 2616 break 2617 if not snapshot_info: 2618 raise SQLMeshError( 2619 f"Model '{model_name}' was not found in environment '{environment}'." 2620 ) 2621 2622 if target_env.name == c.PROD or prod: 2623 return snapshot_info.table_name() 2624 2625 snapshots = self.state_reader.get_snapshots(target_env.snapshots) 2626 deployability_index = DeployabilityIndex.create(snapshots) 2627 2628 return snapshot_info.table_name( 2629 is_deployable=deployability_index.is_deployable(snapshot_info.snapshot_id) 2630 ) 2631 2632 def clear_caches(self) -> None: 2633 paths_to_remove = [path / c.CACHE for path in self.configs] 2634 paths_to_remove.append(self.cache_dir) 2635 2636 if IS_WINDOWS: 2637 paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] 2638 2639 for path in paths_to_remove: 2640 if path.exists(): 2641 rmtree(path) 2642 2643 if isinstance(self._state_sync, CachingStateSync): 2644 self._state_sync.clear_cache() 2645 2646 def export_state( 2647 self, 2648 output_file: Path, 2649 environment_names: t.Optional[t.List[str]] = None, 2650 local_only: bool = False, 2651 confirm: bool = True, 2652 ) -> None: 2653 from sqlmesh.core.state_sync.export_import import export_state 2654 2655 # trigger a connection to the StateSync so we can fail early if there is a problem 2656 # note we still need to do this even if we are doing a local export so we know what 'versions' to write 2657 self.state_sync.get_versions(validate=True) 2658 2659 local_snapshots = self.snapshots if local_only else None 2660 2661 if self.console.start_state_export( 2662 output_file=output_file, 2663 gateway=self.selected_gateway, 2664 state_connection_config=self._state_connection_config, 2665 environment_names=environment_names, 2666 local_only=local_only, 2667 confirm=confirm, 2668 ): 2669 try: 2670 export_state( 2671 state_sync=self.state_sync, 2672 output_file=output_file, 2673 local_snapshots=local_snapshots, 2674 environment_names=environment_names, 2675 console=self.console, 2676 ) 2677 self.console.stop_state_export(success=True, output_file=output_file) 2678 except: 2679 self.console.stop_state_export(success=False, output_file=output_file) 2680 raise 2681 2682 def import_state(self, input_file: Path, clear: bool = False, confirm: bool = True) -> None: 2683 from sqlmesh.core.state_sync.export_import import import_state 2684 2685 if self.console.start_state_import( 2686 input_file=input_file, 2687 gateway=self.selected_gateway, 2688 state_connection_config=self._state_connection_config, 2689 clear=clear, 2690 confirm=confirm, 2691 ): 2692 try: 2693 import_state( 2694 state_sync=self.state_sync, 2695 input_file=input_file, 2696 clear=clear, 2697 console=self.console, 2698 ) 2699 self.console.stop_state_import(success=True, input_file=input_file) 2700 except: 2701 self.console.stop_state_import(success=False, input_file=input_file) 2702 raise 2703 2704 def _run_tests( 2705 self, verbosity: Verbosity = Verbosity.DEFAULT 2706 ) -> t.Tuple[ModelTextTestResult, str]: 2707 test_output_io = StringIO() 2708 result = self.test(stream=test_output_io, verbosity=verbosity) 2709 return result, test_output_io.getvalue() 2710 2711 def _run_plan_tests(self, skip_tests: bool = False) -> t.Optional[ModelTextTestResult]: 2712 if not skip_tests: 2713 result = self.test() 2714 if not result.wasSuccessful(): 2715 raise PlanError( 2716 "Cannot generate plan due to failing test(s). Fix test(s) and run again." 2717 ) 2718 return result 2719 return None 2720 2721 @property 2722 def _model_tables(self) -> t.Dict[str, str]: 2723 """Mapping of model name to physical table name. 2724 2725 If a snapshot has not been versioned yet, its view name will be returned. 2726 """ 2727 return { 2728 fqn: ( 2729 snapshot.table_name() 2730 if snapshot.version 2731 else snapshot.qualified_view_name.for_environment( 2732 EnvironmentNamingInfo.from_environment_catalog_mapping( 2733 self.environment_catalog_mapping, 2734 name=c.PROD, 2735 suffix_target=self.config.environment_suffix_target, 2736 ) 2737 ) 2738 ) 2739 for fqn, snapshot in self.snapshots.items() 2740 } 2741 2742 @cached_property 2743 def cache_dir(self) -> Path: 2744 if self.config.cache_dir: 2745 cache_path = Path(self.config.cache_dir) 2746 if cache_path.is_absolute(): 2747 return cache_path 2748 return self.path / cache_path 2749 2750 # Default to .cache directory in the project path 2751 return self.path / c.CACHE 2752 2753 @cached_property 2754 def engine_adapters(self) -> t.Dict[str, EngineAdapter]: 2755 """Returns all the engine adapters for the gateways defined in the configurations.""" 2756 adapters: t.Dict[str, EngineAdapter] = {self.selected_gateway: self.engine_adapter} 2757 for config in self.configs.values(): 2758 for gateway_name in config.gateways: 2759 if gateway_name not in adapters: 2760 connection = config.get_connection(gateway_name) 2761 adapter = connection.create_engine_adapter( 2762 concurrent_tasks=self.concurrent_tasks, 2763 ) 2764 adapters[gateway_name] = adapter 2765 return adapters 2766 2767 @cached_property 2768 def default_catalog_per_gateway(self) -> t.Dict[str, str]: 2769 """Returns the default catalogs for each engine adapter.""" 2770 return self._scheduler.get_default_catalog_per_gateway(self) 2771 2772 @property 2773 def concurrent_tasks(self) -> int: 2774 if self._concurrent_tasks is None: 2775 self._concurrent_tasks = self.connection_config.concurrent_tasks 2776 return self._concurrent_tasks 2777 2778 @cached_property 2779 def connection_config(self) -> ConnectionConfig: 2780 return self.config.get_connection(self.selected_gateway) 2781 2782 @cached_property 2783 def test_connection_config(self) -> ConnectionConfig: 2784 return self.config.get_test_connection( 2785 self.gateway, 2786 self.default_catalog, 2787 default_catalog_dialect=self.config.dialect, 2788 ) 2789 2790 @cached_property 2791 def environment_catalog_mapping(self) -> RegexKeyDict: 2792 engine_adapter = None 2793 try: 2794 engine_adapter = self.engine_adapter 2795 except Exception: 2796 pass 2797 2798 if ( 2799 self.config.environment_catalog_mapping 2800 and engine_adapter 2801 and not self.engine_adapter.catalog_support.is_multi_catalog_supported 2802 ): 2803 raise SQLMeshError( 2804 "Environment catalog mapping is only supported for engine adapters that support multiple catalogs" 2805 ) 2806 return self.config.environment_catalog_mapping 2807 2808 def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 2809 if gateway: 2810 if adapter := self.engine_adapters.get(gateway): 2811 return adapter 2812 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 2813 return self.engine_adapter 2814 2815 def _snapshots( 2816 self, models_override: t.Optional[UniqueKeyDict[str, Model]] = None 2817 ) -> t.Dict[str, Snapshot]: 2818 nodes = {**(models_override or self._models), **self._standalone_audits} 2819 snapshots = self._nodes_to_snapshots(nodes) 2820 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2821 2822 unrestorable_snapshots = { 2823 snapshot 2824 for snapshot in stored_snapshots.values() 2825 if snapshot.name in nodes and snapshot.unrestorable 2826 } 2827 if unrestorable_snapshots: 2828 for snapshot in unrestorable_snapshots: 2829 logger.info( 2830 "Found a unrestorable snapshot %s. Restamping the model...", snapshot.name 2831 ) 2832 node = nodes[snapshot.name] 2833 nodes[snapshot.name] = node.copy( 2834 update={"stamp": f"revert to {snapshot.identifier}"} 2835 ) 2836 snapshots = self._nodes_to_snapshots(nodes) 2837 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2838 2839 for snapshot in stored_snapshots.values(): 2840 # Keep the original model instance to preserve the query cache. 2841 snapshot.node = snapshots[snapshot.name].node 2842 2843 return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in snapshots.items()} 2844 2845 def _context_diff( 2846 self, 2847 environment: str, 2848 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 2849 create_from: t.Optional[str] = None, 2850 force_no_diff: bool = False, 2851 ensure_finalized_snapshots: bool = False, 2852 diff_rendered: bool = False, 2853 always_recreate_environment: bool = False, 2854 ) -> ContextDiff: 2855 environment = Environment.sanitize_name(environment) 2856 if force_no_diff: 2857 return ContextDiff.create_no_diff(environment, self.state_reader) 2858 2859 return ContextDiff.create( 2860 environment, 2861 snapshots=snapshots or self.snapshots, 2862 create_from=create_from or c.PROD, 2863 state_reader=self.state_reader, 2864 provided_requirements=self._requirements, 2865 excluded_requirements=self._excluded_requirements, 2866 ensure_finalized_snapshots=ensure_finalized_snapshots, 2867 diff_rendered=diff_rendered, 2868 environment_statements=self._environment_statements, 2869 gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, 2870 infer_python_dependencies=self.config.infer_python_dependencies, 2871 always_recreate_environment=always_recreate_environment, 2872 ) 2873 2874 def _destroy(self) -> bool: 2875 # Invalidate all environments, including prod 2876 for environment in self.state_reader.get_environments(): 2877 self.state_sync.invalidate_environment(name=environment.name, protect_prod=False) 2878 self.console.log_success(f"Environment '{environment.name}' invalidated.") 2879 2880 # Run janitor to clean up all objects 2881 self._run_janitor(ignore_ttl=True) 2882 2883 # Remove state tables, including backup tables 2884 self.state_sync.remove_state(including_backup=True) 2885 self.console.log_status_update("State tables removed.") 2886 2887 # Finally clear caches 2888 self.clear_caches() 2889 2890 return True 2891 2892 def _run_janitor(self, ignore_ttl: bool = False) -> None: 2893 current_ts = now_timestamp() 2894 2895 # Clean up expired environments by removing their views and schemas 2896 self._cleanup_environments(current_ts=current_ts) 2897 2898 delete_expired_snapshots( 2899 self.state_sync, 2900 self.snapshot_evaluator, 2901 current_ts=current_ts, 2902 ignore_ttl=ignore_ttl, 2903 console=self.console, 2904 batch_size=self.config.janitor.expired_snapshots_batch_size, 2905 ) 2906 self.state_sync.compact_intervals() 2907 2908 def _cleanup_environments(self, current_ts: t.Optional[int] = None) -> None: 2909 current_ts = current_ts or now_timestamp() 2910 2911 expired_environments_summaries = self.state_sync.get_expired_environments( 2912 current_ts=current_ts 2913 ) 2914 2915 for expired_env_summary in expired_environments_summaries: 2916 expired_env = self.state_reader.get_environment(expired_env_summary.name) 2917 2918 if expired_env: 2919 cleanup_expired_views( 2920 default_adapter=self.engine_adapter, 2921 engine_adapters=self.engine_adapters, 2922 environments=[expired_env], 2923 warn_on_delete_failure=self.config.janitor.warn_on_delete_failure, 2924 console=self.console, 2925 ) 2926 2927 self.state_sync.delete_expired_environments(current_ts=current_ts) 2928 2929 def _try_connection(self, connection_name: str, validator: t.Callable[[], None]) -> None: 2930 connection_name = connection_name.capitalize() 2931 try: 2932 validator() 2933 self.console.log_status_update(f"{connection_name} connection [green]succeeded[/green]") 2934 except Exception as ex: 2935 self.console.log_error(f"{connection_name} connection failed. {ex}") 2936 2937 def _new_state_sync(self) -> StateSync: 2938 return self._provided_state_sync or self._scheduler.create_state_sync(self) 2939 2940 def _new_selector( 2941 self, models: t.Optional[UniqueKeyDict[str, Model]] = None, dag: t.Optional[DAG[str]] = None 2942 ) -> Selector: 2943 return self._selector_cls( 2944 self.state_reader, 2945 models=models or self._models, 2946 context_path=self.path, 2947 dag=dag, 2948 default_catalog=self.default_catalog, 2949 dialect=self.default_dialect, 2950 cache_dir=self.cache_dir, 2951 ) 2952 2953 def _register_notification_targets(self) -> None: 2954 event_notifications = collections.defaultdict(set) 2955 for target in self.notification_targets: 2956 if target.is_configured: 2957 for event in target.notify_on: 2958 event_notifications[event].add(target) 2959 user_notification_targets = { 2960 user.username: set( 2961 target for target in user.notification_targets if target.is_configured 2962 ) 2963 for user in self.users 2964 } 2965 self.notification_target_manager = NotificationTargetManager( 2966 event_notifications, user_notification_targets, username=self.config.username 2967 ) 2968 2969 def _load_materializations(self) -> None: 2970 if not self._loaded: 2971 for loader in self._loaders: 2972 loader.load_materializations() 2973 2974 def _select_models_for_run( 2975 self, 2976 select_models: t.Collection[str], 2977 no_auto_upstream: bool, 2978 snapshots: t.Collection[Snapshot], 2979 ) -> t.Set[str]: 2980 models: UniqueKeyDict[str, Model] = UniqueKeyDict( 2981 "models", **{s.name: s.model for s in snapshots if s.is_model} 2982 ) 2983 dag: DAG[str] = DAG() 2984 for fqn, model in models.items(): 2985 dag.add(fqn, model.depends_on) 2986 model_selector = self._new_selector(models=models, dag=dag) 2987 result = set(model_selector.expand_model_selections(select_models)) 2988 if not no_auto_upstream: 2989 result = set(dag.subdag(*result)) 2990 return result 2991 2992 @cached_property 2993 def _project_type(self) -> str: 2994 project_types = { 2995 c.DBT if loader.__class__.__name__.lower().startswith(c.DBT) else c.NATIVE 2996 for loader in self._loaders 2997 } 2998 return c.HYBRID if len(project_types) > 1 else first(project_types) 2999 3000 def _nodes_to_snapshots(self, nodes: t.Dict[str, Node]) -> t.Dict[str, Snapshot]: 3001 snapshots: t.Dict[str, Snapshot] = {} 3002 fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} 3003 3004 for node in nodes.values(): 3005 kwargs: t.Dict[str, t.Any] = {} 3006 if node.project in self._projects: 3007 config = self.config_for_node(node) 3008 kwargs["ttl"] = config.snapshot_ttl 3009 kwargs["table_naming_convention"] = config.physical_table_naming_convention 3010 3011 snapshot = Snapshot.from_node( 3012 node, 3013 nodes=nodes, 3014 cache=fingerprint_cache, 3015 **kwargs, 3016 ) 3017 snapshots[snapshot.name] = snapshot 3018 return snapshots 3019 3020 def _node_or_snapshot_to_fqn(self, node_or_snapshot: NodeOrSnapshot) -> str: 3021 if isinstance(node_or_snapshot, Snapshot): 3022 return node_or_snapshot.name 3023 if isinstance(node_or_snapshot, str) and not self.standalone_audits.get(node_or_snapshot): 3024 return normalize_model_name( 3025 node_or_snapshot, 3026 dialect=self.default_dialect, 3027 default_catalog=self.default_catalog, 3028 ) 3029 if not isinstance(node_or_snapshot, str): 3030 return node_or_snapshot.fqn 3031 return node_or_snapshot 3032 3033 @property 3034 def _plan_preview_enabled(self) -> bool: 3035 if self.config.plan.enable_preview is not None: 3036 return self.config.plan.enable_preview 3037 # It is dangerous to enable preview by default for dbt projects that rely on engines that don't support cloning. 3038 # Enabling previews in such cases can result in unintended full refreshes because dbt incremental models rely on 3039 # the maximum timestamp value in the target table. 3040 return self._project_type == c.NATIVE or self.engine_adapter.SUPPORTS_CLONING 3041 3042 def _get_plan_default_start_end( 3043 self, 3044 snapshots: t.Dict[str, Snapshot], 3045 max_interval_end_per_model: t.Dict[str, datetime], 3046 backfill_models: t.Optional[t.Set[str]], 3047 modified_model_names: t.Set[str], 3048 execution_time: t.Optional[TimeLike] = None, 3049 ) -> t.Tuple[t.Optional[int], t.Optional[int]]: 3050 # exclude seeds so their stale interval ends does not become the default plan end date 3051 # when they're the only ones that contain intervals in this plan 3052 non_seed_interval_ends = { 3053 model_fqn: end 3054 for model_fqn, end in max_interval_end_per_model.items() 3055 if model_fqn not in snapshots or not snapshots[model_fqn].is_seed 3056 } 3057 if not non_seed_interval_ends: 3058 return None, None 3059 3060 default_end = to_timestamp(max(non_seed_interval_ends.values())) 3061 default_start: t.Optional[int] = None 3062 # Infer the default start by finding the smallest interval start that corresponds to the default end. 3063 for model_name in backfill_models or modified_model_names or max_interval_end_per_model: 3064 if model_name not in snapshots: 3065 continue 3066 node = snapshots[model_name].node 3067 interval_unit = node.interval_unit 3068 default_start = min( 3069 default_start or sys.maxsize, 3070 to_timestamp( 3071 interval_unit.cron_prev( 3072 interval_unit.cron_floor( 3073 max_interval_end_per_model.get( 3074 model_name, node.cron_floor(default_end) 3075 ), 3076 ), 3077 estimate=True, 3078 ) 3079 ), 3080 ) 3081 3082 if execution_time and to_timestamp(default_end) > to_timestamp(execution_time): 3083 # the end date can't be in the future, which can happen if a specific `execution_time` is set and prod intervals 3084 # are newer than it 3085 default_end = to_timestamp(execution_time) 3086 3087 return default_start, default_end 3088 3089 def _calculate_start_override_per_model( 3090 self, 3091 min_intervals: t.Optional[int], 3092 plan_start: t.Optional[TimeLike], 3093 plan_end: t.Optional[TimeLike], 3094 plan_execution_time: TimeLike, 3095 backfill_model_fqns: t.Optional[t.Set[str]], 3096 snapshots_by_model_fqn: t.Dict[str, Snapshot], 3097 end_override_per_model: t.Optional[t.Dict[str, datetime]], 3098 ) -> t.Dict[str, datetime]: 3099 if not min_intervals or not backfill_model_fqns or not plan_start: 3100 # If there are no models to backfill, there are no intervals to consider for backfill, so we dont need to consider a minimum number 3101 # If the plan doesnt have a start date, all intervals are considered already so we dont need to consider a minimum number 3102 # If we dont have a minimum number of intervals to consider, then we dont need to adjust the start date on a per-model basis 3103 return {} 3104 3105 start_overrides: t.Dict[str, datetime] = {} 3106 end_override_per_model = end_override_per_model or {} 3107 3108 plan_execution_time_dt = to_datetime(plan_execution_time) 3109 plan_start_dt = to_datetime(plan_start, relative_base=plan_execution_time_dt) 3110 plan_end_dt = to_datetime( 3111 plan_end or plan_execution_time_dt, relative_base=plan_execution_time_dt 3112 ) 3113 3114 # we need to take the DAG into account so that parent models can be expanded to cover at least as much as their children 3115 # for example, A(hourly) <- B(daily) 3116 # if min_intervals=1, A would have 1 hour and B would have 1 day 3117 # but B depends on A so in order for B to have 1 valid day, A needs to be expanded to 24 hours 3118 backfill_dag: DAG[str] = DAG() 3119 for fqn in backfill_model_fqns: 3120 backfill_dag.add( 3121 fqn, 3122 [ 3123 p.name 3124 for p in snapshots_by_model_fqn[fqn].parents 3125 if p.name in backfill_model_fqns 3126 ], 3127 ) 3128 3129 # start from the leaf nodes and work back towards the root because the min_start at the root node is determined by the calculated starts in the leaf nodes 3130 reversed_dag = backfill_dag.reversed 3131 graph = reversed_dag.graph 3132 3133 for model_fqn in reversed_dag: 3134 # Get the earliest start from all immediate children of this snapshot 3135 # this works because topological ordering guarantees that they've already been visited 3136 # and we always set a start override 3137 min_child_start = min( 3138 [start_overrides[immediate_child_fqn] for immediate_child_fqn in graph[model_fqn]], 3139 default=plan_start_dt, 3140 ) 3141 3142 snapshot = snapshots_by_model_fqn.get(model_fqn) 3143 3144 if not snapshot: 3145 continue 3146 3147 starting_point = end_override_per_model.get(model_fqn, plan_end_dt) 3148 if node_end := snapshot.node.end: 3149 # if we dont do this, if the node end is a *date* (as opposed to a timestamp) 3150 # we end up incorrectly winding back an extra day 3151 node_end_dt = make_exclusive(node_end) 3152 3153 if node_end_dt < plan_end_dt: 3154 # if the model has an end date that has already elapsed, use that as a starting point for calculating min_intervals 3155 # instead of the plan end. If we use the plan end, we will return intervals in the future which are invalid 3156 starting_point = node_end_dt 3157 3158 snapshot_start = snapshot.node.cron_floor(starting_point) 3159 3160 for _ in range(min_intervals): 3161 # wind back the starting point by :min_intervals intervals to arrive at the minimum snapshot start date 3162 snapshot_start = snapshot.node.cron_prev(snapshot_start) 3163 3164 start_overrides[model_fqn] = min(min_child_start, snapshot_start) 3165 3166 return start_overrides 3167 3168 def _get_max_interval_end_per_model( 3169 self, snapshots: t.Dict[str, Snapshot], backfill_models: t.Optional[t.Set[str]] 3170 ) -> t.Dict[str, datetime]: 3171 models_for_interval_end = ( 3172 self._get_models_for_interval_end(snapshots, backfill_models) 3173 if backfill_models is not None 3174 else None 3175 ) 3176 return { 3177 model_fqn: to_datetime(ts) 3178 for model_fqn, ts in self.state_sync.max_interval_end_per_model( 3179 c.PROD, 3180 models=models_for_interval_end, 3181 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 3182 ).items() 3183 } 3184 3185 @staticmethod 3186 def _get_models_for_interval_end( 3187 snapshots: t.Dict[str, Snapshot], backfill_models: t.Set[str] 3188 ) -> t.Set[str]: 3189 models_for_interval_end = set() 3190 models_stack = list(backfill_models) 3191 while models_stack: 3192 next_model = models_stack.pop() 3193 if next_model not in snapshots: 3194 continue 3195 models_for_interval_end.add(next_model) 3196 models_stack.extend( 3197 s.name 3198 for s in snapshots[next_model].parents 3199 if s.name not in models_for_interval_end 3200 ) 3201 return models_for_interval_end 3202 3203 def lint_models( 3204 self, 3205 models: t.Optional[t.Iterable[t.Union[str, Model]]] = None, 3206 raise_on_error: bool = True, 3207 ) -> t.List[AnnotatedRuleViolation]: 3208 found_error = False 3209 3210 model_list = ( 3211 list(self.get_model(model, raise_if_missing=True) for model in models) 3212 if models 3213 else self.models.values() 3214 ) 3215 all_violations = [] 3216 for model in model_list: 3217 # Linter may be `None` if the context is not loaded yet 3218 if linter := self._linters.get(model.project): 3219 lint_violation, violations = ( 3220 linter.lint_model(model, self, console=self.console) or found_error 3221 ) 3222 if lint_violation: 3223 found_error = True 3224 all_violations.extend(violations) 3225 3226 if raise_on_error and found_error: 3227 raise LinterError( 3228 "Linter detected errors in the code. Please fix them before proceeding." 3229 ) 3230 3231 return all_violations 3232 3233 def select_tests( 3234 self, 3235 tests: t.Optional[t.List[str]] = None, 3236 patterns: t.Optional[t.List[str]] = None, 3237 ) -> t.List[ModelTestMetadata]: 3238 """Filter pre-loaded test metadata based on tests and patterns.""" 3239 3240 test_meta = self._model_test_metadata 3241 3242 if tests: 3243 filtered_tests = [] 3244 for test in tests: 3245 if "::" in test: 3246 if test in self._model_test_metadata_fully_qualified_name_index: 3247 filtered_tests.append( 3248 self._model_test_metadata_fully_qualified_name_index[test] 3249 ) 3250 else: 3251 test_path = Path(test) 3252 if test_path in self._model_test_metadata_path_index: 3253 filtered_tests.extend(self._model_test_metadata_path_index[test_path]) 3254 3255 test_meta = filtered_tests 3256 3257 if patterns: 3258 test_meta = filter_tests_by_patterns(test_meta, patterns) 3259 3260 return test_meta
Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
Arguments:
- notification_targets: The notification target to use. Defaults to what is defined in config.
- paths: The directories containing SQLMesh files.
- config: A Config object or the name of a Config object in config.py.
- connection: The name of the connection. If not specified the first connection as it appears in configuration will be used.
- test_connection: The name of the connection to use for tests. If not specified the first connection as it appears in configuration will be used.
- concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
- load: Whether or not to automatically load all models and macros (default True).
- console: The rich instance used for printing out CLI command results.
- users: A list of users to make known to SQLMesh.
377 def __init__( 378 self, 379 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 380 state_sync: t.Optional[StateSync] = None, 381 paths: t.Union[str | Path, t.Iterable[str | Path]] = "", 382 config: t.Optional[t.Union[C, str, t.Dict[Path, C]]] = None, 383 gateway: t.Optional[str] = None, 384 concurrent_tasks: t.Optional[int] = None, 385 loader: t.Optional[t.Type[Loader]] = None, 386 load: bool = True, 387 users: t.Optional[t.List[User]] = None, 388 config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, 389 selector: t.Optional[t.Type[Selector]] = None, 390 ): 391 self.configs = ( 392 config 393 if isinstance(config, dict) 394 else load_configs(config, self.CONFIG_TYPE, paths, **(config_loader_kwargs or {})) 395 ) 396 self._projects = {config.project for config in self.configs.values()} 397 self.dag: DAG[str] = DAG() 398 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 399 self._audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("audits") 400 self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( 401 "standaloneaudits" 402 ) 403 self._model_test_metadata: t.List[ModelTestMetadata] = [] 404 self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} 405 self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} 406 self._models_with_tests: t.Set[str] = set() 407 408 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 409 self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") 410 self._jinja_macros = JinjaMacroRegistry() 411 self._requirements: t.Dict[str, str] = {} 412 self._environment_statements: t.List[EnvironmentStatements] = [] 413 self._excluded_requirements: t.Set[str] = set() 414 self._engine_adapter: t.Optional[EngineAdapter] = None 415 self._linters: t.Dict[str, Linter] = {} 416 self._loaded: bool = False 417 self._selector_cls = selector or NativeSelector 418 419 self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) 420 421 self._all_dialects: t.Set[str] = {self.config.dialect or ""} 422 423 if self.config.disable_anonymized_analytics: 424 analytics.disable_analytics() 425 426 self.gateway = gateway 427 self._scheduler = self.config.get_scheduler(self.gateway) 428 self.environment_ttl = self.config.environment_ttl 429 self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) 430 self.auto_categorize_changes = self.config.plan.auto_categorize_changes 431 self.selected_gateway = (gateway or self.config.default_gateway_name).lower() 432 433 gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults 434 if gw_model_defaults: 435 # Merge global model defaults with the selected gateway's, if it's overriden 436 global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) 437 gateway_defaults = gw_model_defaults.model_dump(exclude_unset=True) 438 439 self.config.model_defaults = ModelDefaultsConfig( 440 **{**global_defaults, **gateway_defaults} 441 ) 442 443 # This allows overriding the default dialect's normalization strategy, so for example 444 # one can do `dialect="duckdb,normalization_strategy=lowercase"` and this will be 445 # applied to the DuckDB dialect globally 446 if "normalization_strategy" in str(self.config.dialect): 447 dialect = Dialect.get_or_raise(self.config.dialect) 448 type(dialect).NORMALIZATION_STRATEGY = dialect.normalization_strategy 449 450 self._loaders = [ 451 (loader or config.loader)(self, path, **config.loader_kwargs) 452 for path, config in self.configs.items() 453 ] 454 455 self._concurrent_tasks = concurrent_tasks 456 self._state_connection_config = ( 457 self.config.get_state_connection(self.gateway) or self.connection_config 458 ) 459 460 self._snapshot_evaluator: t.Optional[SnapshotEvaluator] = None 461 462 self.console = get_console() 463 setattr(self.console, "dialect", self.config.dialect) 464 465 self._provided_state_sync: t.Optional[StateSync] = state_sync 466 self._state_sync: t.Optional[StateSync] = None 467 468 # Should we dedupe notification_targets? If so how? 469 self.notification_targets = (notification_targets or []) + self.config.notification_targets 470 self.users = (users or []) + self.config.users 471 self.users = list({user.username: user for user in self.users}.values()) 472 self._register_notification_targets() 473 474 if load: 475 self.load()
The type of plan builder object to use (default: PlanBuilder).
488 @property 489 def snapshot_evaluator(self) -> SnapshotEvaluator: 490 if not self._snapshot_evaluator: 491 self._snapshot_evaluator = SnapshotEvaluator( 492 { 493 gateway: adapter.with_settings(execute_log_level=logging.INFO) 494 for gateway, adapter in self.engine_adapters.items() 495 }, 496 ddl_concurrent_tasks=self.concurrent_tasks, 497 selected_gateway=self.selected_gateway, 498 ) 499 return self._snapshot_evaluator
501 def execution_context( 502 self, 503 deployability_index: t.Optional[DeployabilityIndex] = None, 504 engine_adapter: t.Optional[EngineAdapter] = None, 505 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 506 ) -> ExecutionContext: 507 """Returns an execution context.""" 508 return ExecutionContext( 509 engine_adapter=engine_adapter or self.engine_adapter, 510 snapshots=snapshots or self.snapshots, 511 deployability_index=deployability_index, 512 default_dialect=self.default_dialect, 513 default_catalog=self.default_catalog, 514 )
Returns an execution context.
516 @python_api_analytics 517 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 518 """Update or insert a model. 519 520 The context's models dictionary will be updated to include these changes. 521 522 Args: 523 model: Model name or instance to update. 524 kwargs: The kwargs to update the model with. 525 526 Returns: 527 A new instance of the updated or inserted model. 528 """ 529 model = self.get_model(model, raise_if_missing=True) 530 if not model.enabled: 531 raise SQLMeshError(f"The disabled model '{model.name}' cannot be upserted") 532 path = model._path 533 534 model = model.copy(update=kwargs) 535 model._path = path 536 537 self.dag.add(model.fqn, model.depends_on) 538 539 self._models.update( 540 { 541 model.fqn: model, 542 # bust the fingerprint cache for all downstream models 543 **{fqn: self._models[fqn].copy() for fqn in self.dag.downstream(model.fqn)}, 544 } 545 ) 546 547 update_model_schemas( 548 self.dag, 549 models=self._models, 550 cache_dir=self.cache_dir, 551 ) 552 553 if model.dialect: 554 self._all_dialects.add(model.dialect) 555 556 model.validate_definition() 557 558 return model
Update or insert a model.
The context's models dictionary will be updated to include these changes.
Arguments:
- model: Model name or instance to update.
- kwargs: The kwargs to update the model with.
Returns:
A new instance of the updated or inserted model.
560 def scheduler( 561 self, 562 environment: t.Optional[str] = None, 563 snapshot_evaluator: t.Optional[SnapshotEvaluator] = None, 564 ) -> Scheduler: 565 """Returns the built-in scheduler. 566 567 Args: 568 environment: The target environment to source model snapshots from, or None 569 if snapshots should be sourced from the currently loaded local state. 570 571 Returns: 572 The built-in scheduler instance. 573 """ 574 snapshots: t.Iterable[Snapshot] 575 if environment is not None: 576 stored_environment = self.state_sync.get_environment(environment) 577 if stored_environment is None: 578 raise ConfigError(f"Environment '{environment}' was not found.") 579 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 580 else: 581 snapshots = self.snapshots.values() 582 583 if not snapshots: 584 raise ConfigError("No models were found") 585 586 return self.create_scheduler(snapshots, snapshot_evaluator or self.snapshot_evaluator)
Returns the built-in scheduler.
Arguments:
- environment: The target environment to source model snapshots from, or None if snapshots should be sourced from the currently loaded local state.
Returns:
The built-in scheduler instance.
588 def create_scheduler( 589 self, snapshots: t.Iterable[Snapshot], snapshot_evaluator: SnapshotEvaluator 590 ) -> Scheduler: 591 """Creates the built-in scheduler. 592 593 Args: 594 snapshots: The snapshots to schedule. 595 596 Returns: 597 The built-in scheduler instance. 598 """ 599 return Scheduler( 600 snapshots, 601 snapshot_evaluator, 602 self.state_sync, 603 default_catalog=self.default_catalog, 604 max_workers=self.concurrent_tasks, 605 console=self.console, 606 notification_target_manager=self.notification_target_manager, 607 )
Creates the built-in scheduler.
Arguments:
- snapshots: The snapshots to schedule.
Returns:
The built-in scheduler instance.
609 @property 610 def state_sync(self) -> StateSync: 611 if not self._state_sync: 612 self._state_sync = self._new_state_sync() 613 614 if self._state_sync.get_versions(validate=False).schema_version == 0: 615 self.console.log_status_update("Initializing new project state...") 616 self._state_sync.migrate() 617 self._state_sync.get_versions() 618 self._state_sync = CachingStateSync(self._state_sync) # type: ignore 619 return self._state_sync
625 def refresh(self) -> None: 626 """Refresh all models that have been updated.""" 627 if any(loader.reload_needed() for loader in self._loaders): 628 self.load()
Refresh all models that have been updated.
630 def load(self, update_schemas: bool = True) -> GenericContext[C]: 631 """Load all files in the context's path.""" 632 load_start_ts = time.perf_counter() 633 634 loaded_projects = [loader.load() for loader in self._loaders] 635 636 self.dag = DAG() 637 self._standalone_audits.clear() 638 self._audits.clear() 639 self._macros.clear() 640 self._models.clear() 641 self._metrics.clear() 642 self._requirements.clear() 643 self._excluded_requirements.clear() 644 self._linters.clear() 645 self._environment_statements = [] 646 self._model_test_metadata.clear() 647 self._model_test_metadata_path_index.clear() 648 self._model_test_metadata_fully_qualified_name_index.clear() 649 self._models_with_tests.clear() 650 651 for loader, project in zip(self._loaders, loaded_projects): 652 self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) 653 self._macros.update(project.macros) 654 self._models.update(project.models) 655 self._metrics.update(project.metrics) 656 self._audits.update(project.audits) 657 self._standalone_audits.update(project.standalone_audits) 658 self._requirements.update(project.requirements) 659 self._excluded_requirements.update(project.excluded_requirements) 660 self._environment_statements.extend(project.environment_statements) 661 662 self._model_test_metadata.extend(project.model_test_metadata) 663 for metadata in project.model_test_metadata: 664 if metadata.path not in self._model_test_metadata_path_index: 665 self._model_test_metadata_path_index[metadata.path] = [] 666 self._model_test_metadata_path_index[metadata.path].append(metadata) 667 self._model_test_metadata_fully_qualified_name_index[ 668 metadata.fully_qualified_test_name 669 ] = metadata 670 self._models_with_tests.add(metadata.model_name) 671 672 config = loader.config 673 self._linters[config.project] = Linter.from_rules( 674 BUILTIN_RULES.union(project.user_rules), config.linter 675 ) 676 677 # Load environment statements from state for projects not in current load 678 if any(self._projects): 679 prod = self.state_reader.get_environment(c.PROD) 680 if prod: 681 existing_statements = self.state_reader.get_environment_statements(c.PROD) 682 for stmt in existing_statements: 683 if stmt.project and stmt.project not in self._projects: 684 self._environment_statements.append(stmt) 685 686 uncached = set() 687 688 if any(self._projects): 689 prod = self.state_reader.get_environment(c.PROD) 690 691 if prod: 692 for snapshot in self.state_reader.get_snapshots(prod.snapshots).values(): 693 if snapshot.node.project in self._projects: 694 uncached.add(snapshot.name) 695 else: 696 local_store = self._standalone_audits if snapshot.is_audit else self._models 697 if snapshot.name in local_store: 698 uncached.add(snapshot.name) 699 else: 700 local_store[snapshot.name] = snapshot.node # type: ignore 701 702 for model in self._models.values(): 703 self.dag.add(model.fqn, model.depends_on) 704 705 if update_schemas: 706 for fqn in self.dag: 707 model = self._models.get(fqn) # type: ignore 708 709 if not model or fqn in uncached: 710 continue 711 712 # make a copy of remote models that depend on local models or in the downstream chain 713 # without this, a SELECT * FROM local will not propogate properly because the downstream 714 # model will get mutated (schema changes) but the object is the same as the remote cache 715 if any(dep in uncached for dep in model.depends_on): 716 uncached.add(fqn) 717 self._models.update({fqn: model.copy(update={"mapping_schema": {}})}) 718 continue 719 720 update_model_schemas( 721 self.dag, 722 models=self._models, 723 cache_dir=self.cache_dir, 724 ) 725 726 models = self.models.values() 727 for model in models: 728 # The model definition can be validated correctly only after the schema is set. 729 model.validate_definition() 730 731 duplicates = set(self._models) & set(self._standalone_audits) 732 if duplicates: 733 raise ConfigError( 734 f"Models and Standalone audits cannot have the same name: {duplicates}" 735 ) 736 737 self._all_dialects = {m.dialect for m in self._models.values() if m.dialect} | { 738 self.default_dialect or "" 739 } 740 741 analytics.collector.on_project_loaded( 742 project_type=self._project_type, 743 models_count=len(self._models), 744 audits_count=len(self._audits), 745 standalone_audits_count=len(self._standalone_audits), 746 macros_count=len(self._macros), 747 jinja_macros_count=len(self._jinja_macros.root_macros), 748 load_time_sec=time.perf_counter() - load_start_ts, 749 state_sync_fingerprint=self._scheduler.state_sync_fingerprint(self), 750 project_name=self.config.project, 751 ) 752 753 self._loaded = True 754 return self
Load all files in the context's path.
756 @python_api_analytics 757 def run( 758 self, 759 environment: t.Optional[str] = None, 760 *, 761 start: t.Optional[TimeLike] = None, 762 end: t.Optional[TimeLike] = None, 763 execution_time: t.Optional[TimeLike] = None, 764 skip_janitor: bool = False, 765 ignore_cron: bool = False, 766 select_models: t.Optional[t.Collection[str]] = None, 767 exit_on_env_update: t.Optional[int] = None, 768 no_auto_upstream: bool = False, 769 ) -> CompletionStatus: 770 """Run the entire dag through the scheduler. 771 772 Args: 773 environment: The target environment to source model snapshots from and virtually update. Default: prod. 774 start: The start of the interval to render. 775 end: The end of the interval to render. 776 execution_time: The date/time time reference to use for execution time. Defaults to now. 777 skip_janitor: Whether to skip the janitor task. 778 ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals. 779 select_models: A list of model selection expressions to filter models that should run. Note that 780 upstream dependencies of selected models will also be evaluated. 781 exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update 782 to the target environment. 783 no_auto_upstream: Whether to not force upstream models to run. Only applicable when using `select_models`. 784 785 Returns: 786 True if the run was successful, False otherwise. 787 """ 788 environment = environment or self.config.default_target_environment 789 environment = Environment.sanitize_name(environment) 790 if not skip_janitor and environment.lower() == c.PROD: 791 self._run_janitor() 792 793 self.notification_target_manager.notify( 794 NotificationEvent.RUN_START, environment=environment 795 ) 796 analytics_run_id = analytics.collector.on_run_start( 797 engine_type=self.snapshot_evaluator.adapter.dialect, 798 state_sync_type=self.state_sync.state_type(), 799 ) 800 self._load_materializations() 801 802 env_check_attempts_num = max( 803 1, 804 self.config.run.environment_check_max_wait 805 // self.config.run.environment_check_interval, 806 ) 807 808 def _block_until_finalized() -> str: 809 for _ in range(env_check_attempts_num): 810 assert environment is not None # mypy 811 environment_state = self.state_sync.get_environment(environment) 812 if not environment_state: 813 raise SQLMeshError(f"Environment '{environment}' was not found.") 814 if environment_state.finalized_ts: 815 return environment_state.plan_id 816 self.console.log_warning( 817 f"Environment '{environment}' is being updated by plan '{environment_state.plan_id}'. " 818 f"Retrying in {self.config.run.environment_check_interval} seconds..." 819 ) 820 time.sleep(self.config.run.environment_check_interval) 821 raise SQLMeshError( 822 f"Exceeded the maximum wait time for environment '{environment}' to be ready. " 823 "This means that the environment either failed to update or the update is taking longer than expected. " 824 "See https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#run to adjust the timeout settings." 825 ) 826 827 success = False 828 interrupted = False 829 done = False 830 while not done: 831 plan_id_at_start = _block_until_finalized() 832 833 def _has_environment_changed() -> bool: 834 assert environment is not None # mypy 835 current_environment_state = self.state_sync.get_environment(environment) 836 return ( 837 not current_environment_state 838 or current_environment_state.plan_id != plan_id_at_start 839 or not current_environment_state.finalized_ts 840 ) 841 842 try: 843 completion_status = self._run( 844 environment, 845 start=start, 846 end=end, 847 execution_time=execution_time, 848 ignore_cron=ignore_cron, 849 select_models=select_models, 850 circuit_breaker=_has_environment_changed, 851 no_auto_upstream=no_auto_upstream, 852 ) 853 done = True 854 except CircuitBreakerError: 855 self.console.log_warning( 856 f"Environment '{environment}' modified while running. Restarting the run..." 857 ) 858 if exit_on_env_update: 859 interrupted = True 860 done = True 861 except Exception as e: 862 self.notification_target_manager.notify( 863 NotificationEvent.RUN_FAILURE, traceback.format_exc() 864 ) 865 logger.info("Run failed.", exc_info=e) 866 analytics.collector.on_run_end( 867 run_id=analytics_run_id, succeeded=False, interrupted=False, error=e 868 ) 869 raise e 870 871 if completion_status.is_success or interrupted: 872 self.notification_target_manager.notify( 873 NotificationEvent.RUN_END, environment=environment 874 ) 875 self.console.log_success(f"Run finished for environment '{environment}'") 876 elif completion_status.is_failure: 877 self.notification_target_manager.notify( 878 NotificationEvent.RUN_FAILURE, "See console logs for details." 879 ) 880 881 analytics.collector.on_run_end( 882 run_id=analytics_run_id, succeeded=success, interrupted=interrupted 883 ) 884 885 if interrupted and exit_on_env_update is not None: 886 sys.exit(exit_on_env_update) 887 888 return completion_status
Run the entire dag through the scheduler.
Arguments:
- environment: The target environment to source model snapshots from and virtually update. Default: prod.
- start: The start of the interval to render.
- end: The end of the interval to render.
- execution_time: The date/time time reference to use for execution time. Defaults to now.
- skip_janitor: Whether to skip the janitor task.
- ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals.
- select_models: A list of model selection expressions to filter models that should run. Note that upstream dependencies of selected models will also be evaluated.
- exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update to the target environment.
- no_auto_upstream: Whether to not force upstream models to run. Only applicable when using
select_models.
Returns:
True if the run was successful, False otherwise.
903 @python_api_analytics 904 def destroy(self) -> bool: 905 success = False 906 907 # Collect resources to be deleted 908 environments = self.state_reader.get_environments() 909 schemas_to_delete = set() 910 tables_to_delete = set() 911 views_to_delete = set() 912 all_snapshot_infos = set() 913 914 # For each environment find schemas and tables 915 for environment in environments: 916 all_snapshot_infos.update(environment.snapshots) 917 snapshots = self.state_reader.get_snapshots(environment.snapshots).values() 918 for snapshot in snapshots: 919 if snapshot.is_model and not snapshot.is_symbolic: 920 # Get the appropriate adapter 921 if environment.gateway_managed and snapshot.model_gateway: 922 adapter = self.engine_adapters.get( 923 snapshot.model_gateway, self.engine_adapter 924 ) 925 else: 926 adapter = self.engine_adapter 927 928 if environment.suffix_target.is_schema or environment.suffix_target.is_catalog: 929 schema = snapshot.qualified_view_name.schema_for_environment( 930 environment.naming_info, dialect=adapter.dialect 931 ) 932 catalog = snapshot.qualified_view_name.catalog_for_environment( 933 environment.naming_info, dialect=adapter.dialect 934 ) 935 if catalog: 936 schemas_to_delete.add(f"{catalog}.{schema}") 937 else: 938 schemas_to_delete.add(schema) 939 940 if environment.suffix_target.is_table: 941 view_name = snapshot.qualified_view_name.for_environment( 942 environment.naming_info, dialect=adapter.dialect 943 ) 944 views_to_delete.add(view_name) 945 946 # Add snapshot tables 947 table_name = snapshot.table_name() 948 tables_to_delete.add(table_name) 949 950 if self.console.start_destroy(schemas_to_delete, views_to_delete, tables_to_delete): 951 try: 952 success = self._destroy() 953 finally: 954 self.console.stop_destroy(success=success) 955 956 return success
970 def get_model( 971 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: bool = False 972 ) -> t.Optional[Model]: 973 """Returns a model with the given name or None if a model with such name doesn't exist. 974 975 Args: 976 model_or_snapshot: A model name, model, or snapshot. 977 raise_if_missing: Raises an error if a model is not found. 978 979 Returns: 980 The expected model. 981 """ 982 if isinstance(model_or_snapshot, Snapshot): 983 return model_or_snapshot.model 984 if not isinstance(model_or_snapshot, str): 985 return model_or_snapshot 986 987 try: 988 # We should try all dialects referenced in the project for cases when models use mixed dialects. 989 for dialect in self._all_dialects: 990 normalized_name = normalize_model_name( 991 model_or_snapshot, 992 dialect=dialect, 993 default_catalog=self.default_catalog, 994 ) 995 if normalized_name in self._models: 996 return self._models[normalized_name] 997 except: 998 pass 999 1000 if raise_if_missing: 1001 if model_or_snapshot.endswith((".sql", ".py")): 1002 msg = "Resolving models by path is not supported, please pass in the model name instead." 1003 else: 1004 msg = f"Cannot find model with name '{model_or_snapshot}'" 1005 1006 raise SQLMeshError(msg) 1007 1008 return None
Returns a model with the given name or None if a model with such name doesn't exist.
Arguments:
- model_or_snapshot: A model name, model, or snapshot.
- raise_if_missing: Raises an error if a model is not found.
Returns:
The expected model.
1023 def get_snapshot( 1024 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: bool = False 1025 ) -> t.Optional[Snapshot]: 1026 """Returns a snapshot with the given name or None if a snapshot with such name doesn't exist. 1027 1028 Args: 1029 node_or_snapshot: A node name, node, or snapshot. 1030 raise_if_missing: Raises an error if a snapshot is not found. 1031 1032 Returns: 1033 The expected snapshot. 1034 """ 1035 if isinstance(node_or_snapshot, Snapshot): 1036 return node_or_snapshot 1037 fqn = self._node_or_snapshot_to_fqn(node_or_snapshot) 1038 snapshot = self.snapshots.get(fqn) 1039 1040 if raise_if_missing and not snapshot: 1041 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1042 1043 return snapshot
Returns a snapshot with the given name or None if a snapshot with such name doesn't exist.
Arguments:
- node_or_snapshot: A node name, node, or snapshot.
- raise_if_missing: Raises an error if a snapshot is not found.
Returns:
The expected snapshot.
1045 def config_for_path(self, path: Path) -> t.Tuple[Config, Path]: 1046 """Returns the config and path of the said project for a given file path.""" 1047 for config_path, config in self.configs.items(): 1048 try: 1049 path.relative_to(config_path) 1050 return config, config_path 1051 except ValueError: 1052 pass 1053 return self.config, self.path
Returns the config and path of the said project for a given file path.
1061 @property 1062 def models(self) -> MappingProxyType[str, Model]: 1063 """Returns all registered models in this context.""" 1064 return MappingProxyType(self._models)
Returns all registered models in this context.
1066 @property 1067 def metrics(self) -> MappingProxyType[str, Metric]: 1068 """Returns all registered metrics in this context.""" 1069 return MappingProxyType(self._metrics)
Returns all registered metrics in this context.
1071 @property 1072 def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: 1073 """Returns all registered standalone audits in this context.""" 1074 return MappingProxyType(self._standalone_audits)
Returns all registered standalone audits in this context.
1076 @property 1077 def models_with_tests(self) -> t.Set[str]: 1078 """Returns all models with tests in this context.""" 1079 return self._models_with_tests
Returns all models with tests in this context.
1081 @property 1082 def snapshots(self) -> t.Dict[str, Snapshot]: 1083 """Generates and returns snapshots based on models registered in this context. 1084 1085 If one of the snapshots has been previously stored in the persisted state, the stored 1086 instance will be returned. 1087 """ 1088 return self._snapshots()
Generates and returns snapshots based on models registered in this context.
If one of the snapshots has been previously stored in the persisted state, the stored instance will be returned.
1090 @property 1091 def requirements(self) -> t.Dict[str, str]: 1092 """Returns the Python dependencies of the project loaded in this context.""" 1093 return self._requirements.copy()
Returns the Python dependencies of the project loaded in this context.
1099 @python_api_analytics 1100 def render( 1101 self, 1102 model_or_snapshot: ModelOrSnapshot, 1103 *, 1104 start: t.Optional[TimeLike] = None, 1105 end: t.Optional[TimeLike] = None, 1106 execution_time: t.Optional[TimeLike] = None, 1107 expand: t.Union[bool, t.Iterable[str]] = False, 1108 **kwargs: t.Any, 1109 ) -> exp.Expr: 1110 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 1111 1112 Args: 1113 model_or_snapshot: The model, model name, or snapshot to render. 1114 start: The start of the interval to render. 1115 end: The end of the interval to render. 1116 execution_time: The date/time time reference to use for execution time. Defaults to now. 1117 expand: Whether or not to use expand materialized models, defaults to False. 1118 If True, all referenced models are expanded as raw queries. 1119 If a list, only referenced models are expanded as raw queries. 1120 1121 Returns: 1122 The rendered expression. 1123 """ 1124 execution_time = execution_time or now() 1125 1126 model = self.get_model(model_or_snapshot, raise_if_missing=True) 1127 1128 if expand and not isinstance(expand, bool): 1129 expand = { 1130 normalize_model_name( 1131 x, default_catalog=self.default_catalog, dialect=self.default_dialect 1132 ) 1133 for x in expand 1134 } 1135 1136 expand = self.dag.upstream(model.fqn) if expand is True else expand or [] 1137 1138 if model.is_seed: 1139 import pandas as pd 1140 1141 df = next( 1142 model.render( 1143 context=self.execution_context( 1144 engine_adapter=self._get_engine_adapter(model.gateway) 1145 ), 1146 start=start, 1147 end=end, 1148 execution_time=execution_time, 1149 **kwargs, 1150 ) 1151 ) 1152 return next(pandas_to_sql(t.cast(pd.DataFrame, df), model.columns_to_types)) 1153 1154 snapshots = self.snapshots 1155 deployability_index = DeployabilityIndex.create(snapshots.values(), start=start) 1156 1157 return model.render_query_or_raise( 1158 start=start, 1159 end=end, 1160 execution_time=execution_time, 1161 snapshots=snapshots, 1162 expand=expand, 1163 deployability_index=deployability_index, 1164 engine_adapter=self._get_engine_adapter(model.gateway), 1165 **kwargs, 1166 )
Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
Arguments:
- model_or_snapshot: The model, model name, or snapshot to render.
- start: The start of the interval to render.
- end: The end of the interval to render.
- execution_time: The date/time time reference to use for execution time. Defaults to now.
- expand: Whether or not to use expand materialized models, defaults to False. If True, all referenced models are expanded as raw queries. If a list, only referenced models are expanded as raw queries.
Returns:
The rendered expression.
1168 @python_api_analytics 1169 def evaluate( 1170 self, 1171 model_or_snapshot: ModelOrSnapshot, 1172 start: TimeLike, 1173 end: TimeLike, 1174 execution_time: TimeLike, 1175 limit: t.Optional[int] = None, 1176 **kwargs: t.Any, 1177 ) -> DF: 1178 """Evaluate a model or snapshot (running its query against a DB/Engine). 1179 1180 This method is used to test or iterate on models without side effects. 1181 1182 Args: 1183 model_or_snapshot: The model, model name, or snapshot to render. 1184 start: The start of the interval to evaluate. 1185 end: The end of the interval to evaluate. 1186 execution_time: The date/time time reference to use for execution time. 1187 limit: A limit applied to the model. 1188 """ 1189 snapshots = self.snapshots 1190 fqn = self._node_or_snapshot_to_fqn(model_or_snapshot) 1191 if fqn not in snapshots: 1192 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1193 snapshot = snapshots[fqn] 1194 1195 # Expand all uncategorized parents since physical tables don't exist for them yet 1196 expand = [ 1197 parent 1198 for parent in self.dag.upstream(snapshot.model.fqn) 1199 if (parent_snapshot := snapshots.get(parent)) 1200 and parent_snapshot.is_model 1201 and parent_snapshot.model.is_sql 1202 and not parent_snapshot.categorized 1203 ] 1204 1205 df = self.snapshot_evaluator.evaluate_and_fetch( 1206 snapshot, 1207 start=start, 1208 end=end, 1209 execution_time=execution_time, 1210 snapshots=self.snapshots, 1211 limit=limit or c.DEFAULT_MAX_LIMIT, 1212 expand=expand, 1213 ) 1214 1215 if df is None: 1216 raise RuntimeError(f"Error evaluating {snapshot.name}") 1217 1218 return df
Evaluate a model or snapshot (running its query against a DB/Engine).
This method is used to test or iterate on models without side effects.
Arguments:
- model_or_snapshot: The model, model name, or snapshot to render.
- start: The start of the interval to evaluate.
- end: The end of the interval to evaluate.
- execution_time: The date/time time reference to use for execution time.
- limit: A limit applied to the model.
1220 @python_api_analytics 1221 def format( 1222 self, 1223 transpile: t.Optional[str] = None, 1224 rewrite_casts: t.Optional[bool] = None, 1225 append_newline: t.Optional[bool] = None, 1226 *, 1227 check: t.Optional[bool] = None, 1228 paths: t.Optional[t.Tuple[t.Union[str, Path], ...]] = None, 1229 **kwargs: t.Any, 1230 ) -> bool: 1231 """Format all SQL models and audits.""" 1232 filtered_targets = [ 1233 target 1234 for target in chain(self._models.values(), self._audits.values()) 1235 if target._path is not None 1236 and target._path.suffix == ".sql" 1237 and (not paths or any(target._path.samefile(p) for p in paths)) 1238 ] 1239 unformatted_file_paths = [] 1240 1241 for target in filtered_targets: 1242 if ( 1243 target._path is None or target.formatting is False 1244 ): # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop 1245 continue 1246 1247 with open(target._path, "r+", encoding="utf-8") as file: 1248 before = file.read() 1249 1250 after = self._format( 1251 target, 1252 before, 1253 transpile=transpile, 1254 rewrite_casts=rewrite_casts, 1255 append_newline=append_newline, 1256 **kwargs, 1257 ) 1258 1259 if not check: 1260 file.seek(0) 1261 file.write(after) 1262 file.truncate() 1263 elif before != after: 1264 unformatted_file_paths.append(target._path) 1265 1266 if unformatted_file_paths: 1267 for path in unformatted_file_paths: 1268 self.console.log_status_update(f"{path} needs reformatting.") 1269 self.console.log_status_update( 1270 f"\n{len(unformatted_file_paths)} file(s) need reformatting." 1271 ) 1272 return False 1273 1274 return True
Format all SQL models and audits.
1314 @python_api_analytics 1315 def plan( 1316 self, 1317 environment: t.Optional[str] = None, 1318 *, 1319 start: t.Optional[TimeLike] = None, 1320 end: t.Optional[TimeLike] = None, 1321 execution_time: t.Optional[TimeLike] = None, 1322 create_from: t.Optional[str] = None, 1323 skip_tests: t.Optional[bool] = None, 1324 restate_models: t.Optional[t.Iterable[str]] = None, 1325 no_gaps: t.Optional[bool] = None, 1326 skip_backfill: t.Optional[bool] = None, 1327 empty_backfill: t.Optional[bool] = None, 1328 forward_only: t.Optional[bool] = None, 1329 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1330 allow_additive_models: t.Optional[t.Collection[str]] = None, 1331 no_prompts: t.Optional[bool] = None, 1332 auto_apply: t.Optional[bool] = None, 1333 no_auto_categorization: t.Optional[bool] = None, 1334 effective_from: t.Optional[TimeLike] = None, 1335 include_unmodified: t.Optional[bool] = None, 1336 select_models: t.Optional[t.Collection[str]] = None, 1337 backfill_models: t.Optional[t.Collection[str]] = None, 1338 categorizer_config: t.Optional[CategorizerConfig] = None, 1339 enable_preview: t.Optional[bool] = None, 1340 no_diff: t.Optional[bool] = None, 1341 run: t.Optional[bool] = None, 1342 diff_rendered: t.Optional[bool] = None, 1343 skip_linter: t.Optional[bool] = None, 1344 explain: t.Optional[bool] = None, 1345 ignore_cron: t.Optional[bool] = None, 1346 min_intervals: t.Optional[int] = None, 1347 ) -> Plan: 1348 """Interactively creates a plan. 1349 1350 This method compares the current context with the target environment. It then presents 1351 the differences and asks whether to backfill each modified model. 1352 1353 Args: 1354 environment: The environment to diff and plan against. 1355 start: The start date of the backfill if there is one. 1356 end: The end date of the backfill if there is one. 1357 execution_time: The date/time reference to use for execution time. Defaults to now. 1358 create_from: The environment to create the target environment from if it 1359 doesn't exist. If not specified, the "prod" environment will be used. 1360 skip_tests: Unit tests are run by default so this will skip them if enabled 1361 restate_models: A list of either internal or external models, or tags, that need to be restated 1362 for the given plan interval. If the target environment is a production environment, 1363 ALL snapshots that depended on these upstream tables will have their intervals deleted 1364 (even ones not in this current environment). Only the snapshots in this environment will 1365 be backfilled whereas others need to be recovered on a future plan application. For development 1366 environments only snapshots that are part of this plan will be affected. 1367 no_gaps: Whether to ensure that new snapshots for models that are already a 1368 part of the target environment have no data gaps when compared against previous 1369 snapshots for same models. 1370 skip_backfill: Whether to skip the backfill step. Default: False. 1371 empty_backfill: Like skip_backfill, but also records processed intervals. 1372 forward_only: Whether the purpose of the plan is to make forward only changes. 1373 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1374 allow_additive_models: Models whose forward-only changes are allowed to be additive. 1375 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 1376 if this flag is set to true and there are uncategorized changes the plan creation will 1377 fail. Default: False. 1378 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 1379 no_auto_categorization: Indicates whether to disable automatic categorization of model 1380 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1381 option determines the behavior. 1382 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1383 project config by default. 1384 effective_from: The effective date from which to apply forward-only changes on production. 1385 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1386 select_models: A list of model selection strings to filter the models that should be included into this plan. 1387 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1388 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1389 no_diff: Hide text differences for changed models. 1390 run: Whether to run latest intervals as part of the plan application. 1391 diff_rendered: Whether the diff should compare raw vs rendered models 1392 skip_linter: Linter runs by default so this will skip it if enabled 1393 explain: Whether to explain the plan instead of applying it. 1394 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1395 on every model when checking for missing intervals 1396 1397 Returns: 1398 The populated Plan object. 1399 """ 1400 plan_builder = self.plan_builder( 1401 environment, 1402 start=start, 1403 end=end, 1404 execution_time=execution_time, 1405 create_from=create_from, 1406 skip_tests=skip_tests, 1407 restate_models=restate_models, 1408 no_gaps=no_gaps, 1409 skip_backfill=skip_backfill, 1410 empty_backfill=empty_backfill, 1411 forward_only=forward_only, 1412 allow_destructive_models=allow_destructive_models, 1413 allow_additive_models=allow_additive_models, 1414 no_auto_categorization=no_auto_categorization, 1415 effective_from=effective_from, 1416 include_unmodified=include_unmodified, 1417 select_models=select_models, 1418 backfill_models=backfill_models, 1419 categorizer_config=categorizer_config, 1420 enable_preview=enable_preview, 1421 run=run, 1422 diff_rendered=diff_rendered, 1423 skip_linter=skip_linter, 1424 explain=explain, 1425 ignore_cron=ignore_cron, 1426 min_intervals=min_intervals, 1427 ) 1428 1429 plan = plan_builder.build() 1430 1431 if no_auto_categorization or plan.uncategorized: 1432 # Prompts are required if the auto categorization is disabled 1433 # or if there are any uncategorized snapshots in the plan 1434 no_prompts = False 1435 1436 if explain: 1437 auto_apply = True 1438 1439 self.console.plan( 1440 plan_builder, 1441 auto_apply if auto_apply is not None else self.config.plan.auto_apply, 1442 self.default_catalog, 1443 no_diff=no_diff if no_diff is not None else self.config.plan.no_diff, 1444 no_prompts=no_prompts if no_prompts is not None else self.config.plan.no_prompts, 1445 ) 1446 1447 return plan
Interactively creates a plan.
This method compares the current context with the target environment. It then presents the differences and asks whether to backfill each modified model.
Arguments:
- environment: The environment to diff and plan against.
- start: The start date of the backfill if there is one.
- end: The end date of the backfill if there is one.
- execution_time: The date/time reference to use for execution time. Defaults to now.
- create_from: The environment to create the target environment from if it doesn't exist. If not specified, the "prod" environment will be used.
- skip_tests: Unit tests are run by default so this will skip them if enabled
- restate_models: A list of either internal or external models, or tags, that need to be restated for the given plan interval. If the target environment is a production environment, ALL snapshots that depended on these upstream tables will have their intervals deleted (even ones not in this current environment). Only the snapshots in this environment will be backfilled whereas others need to be recovered on a future plan application. For development environments only snapshots that are part of this plan will be affected.
- no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
- skip_backfill: Whether to skip the backfill step. Default: False.
- empty_backfill: Like skip_backfill, but also records processed intervals.
- forward_only: Whether the purpose of the plan is to make forward only changes.
- allow_destructive_models: Models whose forward-only changes are allowed to be destructive.
- allow_additive_models: Models whose forward-only changes are allowed to be additive.
- no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that if this flag is set to true and there are uncategorized changes the plan creation will fail. Default: False.
- auto_apply: Whether to automatically apply the new plan after creation. Default: False.
- no_auto_categorization: Indicates whether to disable automatic categorization of model changes (breaking / non-breaking). If not provided, then the corresponding configuration option determines the behavior.
- categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the project config by default.
- effective_from: The effective date from which to apply forward-only changes on production.
- include_unmodified: Indicates whether to include unmodified models in the target development environment.
- select_models: A list of model selection strings to filter the models that should be included into this plan.
- backfill_models: A list of model selection strings to filter the models for which the data should be backfilled.
- enable_preview: Indicates whether to enable preview for forward-only models in development environments.
- no_diff: Hide text differences for changed models.
- run: Whether to run latest intervals as part of the plan application.
- diff_rendered: Whether the diff should compare raw vs rendered models
- skip_linter: Linter runs by default so this will skip it if enabled
- explain: Whether to explain the plan instead of applying it.
- min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered on every model when checking for missing intervals
Returns:
The populated Plan object.
1449 @python_api_analytics 1450 def plan_builder( 1451 self, 1452 environment: t.Optional[str] = None, 1453 *, 1454 start: t.Optional[TimeLike] = None, 1455 end: t.Optional[TimeLike] = None, 1456 execution_time: t.Optional[TimeLike] = None, 1457 create_from: t.Optional[str] = None, 1458 skip_tests: t.Optional[bool] = None, 1459 restate_models: t.Optional[t.Iterable[str]] = None, 1460 no_gaps: t.Optional[bool] = None, 1461 skip_backfill: t.Optional[bool] = None, 1462 empty_backfill: t.Optional[bool] = None, 1463 forward_only: t.Optional[bool] = None, 1464 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1465 allow_additive_models: t.Optional[t.Collection[str]] = None, 1466 no_auto_categorization: t.Optional[bool] = None, 1467 effective_from: t.Optional[TimeLike] = None, 1468 include_unmodified: t.Optional[bool] = None, 1469 select_models: t.Optional[t.Collection[str]] = None, 1470 backfill_models: t.Optional[t.Collection[str]] = None, 1471 categorizer_config: t.Optional[CategorizerConfig] = None, 1472 enable_preview: t.Optional[bool] = None, 1473 run: t.Optional[bool] = None, 1474 diff_rendered: t.Optional[bool] = None, 1475 skip_linter: t.Optional[bool] = None, 1476 explain: t.Optional[bool] = None, 1477 ignore_cron: t.Optional[bool] = None, 1478 min_intervals: t.Optional[int] = None, 1479 always_include_local_changes: t.Optional[bool] = None, 1480 ) -> PlanBuilder: 1481 """Creates a plan builder. 1482 1483 Args: 1484 environment: The environment to diff and plan against. 1485 start: The start date of the backfill if there is one. 1486 end: The end date of the backfill if there is one. 1487 execution_time: The date/time reference to use for execution time. Defaults to now. 1488 create_from: The environment to create the target environment from if it 1489 doesn't exist. If not specified, the "prod" environment will be used. 1490 skip_tests: Unit tests are run by default so this will skip them if enabled 1491 restate_models: A list of either internal or external models, or tags, that need to be restated 1492 for the given plan interval. If the target environment is a production environment, 1493 ALL snapshots that depended on these upstream tables will have their intervals deleted 1494 (even ones not in this current environment). Only the snapshots in this environment will 1495 be backfilled whereas others need to be recovered on a future plan application. For development 1496 environments only snapshots that are part of this plan will be affected. 1497 no_gaps: Whether to ensure that new snapshots for models that are already a 1498 part of the target environment have no data gaps when compared against previous 1499 snapshots for same models. 1500 skip_backfill: Whether to skip the backfill step. Default: False. 1501 empty_backfill: Like skip_backfill, but also records processed intervals. 1502 forward_only: Whether the purpose of the plan is to make forward only changes. 1503 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1504 no_auto_categorization: Indicates whether to disable automatic categorization of model 1505 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1506 option determines the behavior. 1507 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1508 project config by default. 1509 effective_from: The effective date from which to apply forward-only changes on production. 1510 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1511 select_models: A list of model selection strings to filter the models that should be included into this plan. 1512 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1513 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1514 run: Whether to run latest intervals as part of the plan application. 1515 diff_rendered: Whether the diff should compare raw vs rendered models 1516 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1517 on every model when checking for missing intervals 1518 always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. 1519 However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. 1520 1521 Returns: 1522 The plan builder. 1523 """ 1524 kwargs: t.Dict[str, t.Optional[UserProvidedFlags]] = { 1525 "start": start, 1526 "end": end, 1527 "execution_time": execution_time, 1528 "create_from": create_from, 1529 "skip_tests": skip_tests, 1530 "restate_models": list(restate_models) if restate_models is not None else None, 1531 "no_gaps": no_gaps, 1532 "skip_backfill": skip_backfill, 1533 "empty_backfill": empty_backfill, 1534 "forward_only": forward_only, 1535 "allow_destructive_models": list(allow_destructive_models) 1536 if allow_destructive_models is not None 1537 else None, 1538 "allow_additive_models": list(allow_additive_models) 1539 if allow_additive_models is not None 1540 else None, 1541 "no_auto_categorization": no_auto_categorization, 1542 "effective_from": effective_from, 1543 "include_unmodified": include_unmodified, 1544 "select_models": list(select_models) if select_models is not None else None, 1545 "backfill_models": list(backfill_models) if backfill_models is not None else None, 1546 "enable_preview": enable_preview, 1547 "run": run, 1548 "diff_rendered": diff_rendered, 1549 "skip_linter": skip_linter, 1550 "min_intervals": min_intervals, 1551 } 1552 user_provided_flags: t.Dict[str, UserProvidedFlags] = { 1553 k: v for k, v in kwargs.items() if v is not None 1554 } 1555 1556 skip_tests = explain or skip_tests or False 1557 no_gaps = no_gaps or False 1558 skip_backfill = skip_backfill or False 1559 empty_backfill = empty_backfill or False 1560 run = run or False 1561 diff_rendered = diff_rendered or False 1562 skip_linter = skip_linter or False 1563 min_intervals = min_intervals or 0 1564 1565 environment = environment or self.config.default_target_environment 1566 environment = Environment.sanitize_name(environment) 1567 is_dev = environment != c.PROD 1568 1569 if include_unmodified is None: 1570 include_unmodified = self.config.plan.include_unmodified 1571 1572 if skip_backfill and not no_gaps and not is_dev: 1573 # note: we deliberately don't mention the --no-gaps flag in case the plan came from the sqlmesh_dbt command 1574 # todo: perhaps we could have better error messages if we check sys.argv[0] for which cli is running? 1575 self.console.log_warning( 1576 "Skipping the backfill stage for production can lead to unexpected results, such as tables being empty or incremental data with non-contiguous time ranges being made available.\n" 1577 "If you are doing this deliberately to create an empty version of a table to test a change, please consider using Virtual Data Environments instead." 1578 ) 1579 1580 if not skip_linter: 1581 self.lint_models() 1582 1583 self._run_plan_tests(skip_tests=skip_tests) 1584 1585 environment_ttl = ( 1586 self.environment_ttl if environment not in self.pinned_environments else None 1587 ) 1588 1589 model_selector = self._new_selector() 1590 1591 if allow_destructive_models: 1592 expanded_destructive_models = model_selector.expand_model_selections( 1593 allow_destructive_models 1594 ) 1595 else: 1596 expanded_destructive_models = None 1597 1598 if allow_additive_models: 1599 expanded_additive_models = model_selector.expand_model_selections(allow_additive_models) 1600 else: 1601 expanded_additive_models = None 1602 1603 if backfill_models: 1604 backfill_models = model_selector.expand_model_selections(backfill_models) 1605 else: 1606 backfill_models = None 1607 1608 models_override: t.Optional[UniqueKeyDict[str, Model]] = None 1609 if select_models: 1610 try: 1611 models_override = model_selector.select_models( 1612 select_models, 1613 environment, 1614 fallback_env_name=create_from or c.PROD, 1615 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1616 ) 1617 except SQLMeshError as e: 1618 logger.exception(e) # ensure the full stack trace is logged 1619 raise PlanError( 1620 f"{e}\nCheck the SQLMesh log file for the full stack trace.\nIf the model has been fixed locally, please ensure that the --select-model expression includes it." 1621 ) 1622 if not backfill_models: 1623 # Only backfill selected models unless explicitly specified. 1624 backfill_models = model_selector.expand_model_selections(select_models) 1625 1626 expanded_restate_models = None 1627 if restate_models is not None: 1628 expanded_restate_models = model_selector.expand_model_selections(restate_models) 1629 1630 if (restate_models is not None and not expanded_restate_models) or ( 1631 backfill_models is not None and not backfill_models 1632 ): 1633 raise PlanError( 1634 "Selector did not return any models. Please check your model selection and try again." 1635 ) 1636 1637 if always_include_local_changes is None: 1638 # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes 1639 force_no_diff = restate_models is not None or ( 1640 backfill_models is not None and not backfill_models 1641 ) 1642 else: 1643 force_no_diff = not always_include_local_changes 1644 1645 snapshots = self._snapshots(models_override) 1646 context_diff = self._context_diff( 1647 environment or c.PROD, 1648 snapshots=snapshots, 1649 create_from=create_from, 1650 force_no_diff=force_no_diff, 1651 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1652 diff_rendered=diff_rendered, 1653 always_recreate_environment=self.config.plan.always_recreate_environment, 1654 ) 1655 modified_model_names = { 1656 *context_diff.modified_snapshots, 1657 *[s.name for s in context_diff.added], 1658 } 1659 1660 if ( 1661 is_dev 1662 and not include_unmodified 1663 and backfill_models is None 1664 and expanded_restate_models is None 1665 ): 1666 # Only backfill modified and added models. 1667 # This ensures that no models outside the impacted sub-DAG(s) will be backfilled unexpectedly. 1668 backfill_models = modified_model_names or None 1669 1670 max_interval_end_per_model = None 1671 default_start, default_end = None, None 1672 if not run: 1673 ignore_cron = False 1674 max_interval_end_per_model = self._get_max_interval_end_per_model( 1675 snapshots, backfill_models 1676 ) 1677 # If no end date is specified, use the max interval end from prod 1678 # to prevent unintended evaluation of the entire DAG. 1679 default_start, default_end = self._get_plan_default_start_end( 1680 snapshots, 1681 max_interval_end_per_model, 1682 backfill_models, 1683 modified_model_names, 1684 execution_time or now(), 1685 ) 1686 1687 # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model. 1688 self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values()) 1689 1690 start_override_per_model = self._calculate_start_override_per_model( 1691 min_intervals, 1692 start or default_start, 1693 end or default_end, 1694 execution_time or now(), 1695 backfill_models, 1696 snapshots, 1697 max_interval_end_per_model, 1698 ) 1699 1700 if not self.config.virtual_environment_mode.is_full: 1701 forward_only = True 1702 elif forward_only is None: 1703 forward_only = self.config.plan.forward_only 1704 1705 # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments 1706 # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition 1707 restate_all_snapshots = ( 1708 expanded_restate_models is not None 1709 and not is_dev 1710 and self.config.virtual_environment_mode.is_full 1711 ) 1712 1713 return self.PLAN_BUILDER_TYPE( 1714 context_diff=context_diff, 1715 start=start, 1716 end=end, 1717 execution_time=execution_time, 1718 apply=self.apply, 1719 restate_models=expanded_restate_models, 1720 restate_all_snapshots=restate_all_snapshots, 1721 backfill_models=backfill_models, 1722 no_gaps=no_gaps, 1723 skip_backfill=skip_backfill, 1724 empty_backfill=empty_backfill, 1725 is_dev=is_dev, 1726 forward_only=forward_only, 1727 allow_destructive_models=expanded_destructive_models, 1728 allow_additive_models=expanded_additive_models, 1729 environment_ttl=environment_ttl, 1730 environment_suffix_target=self.config.environment_suffix_target, 1731 environment_catalog_mapping=self.environment_catalog_mapping, 1732 categorizer_config=categorizer_config or self.auto_categorize_changes, 1733 auto_categorization_enabled=not no_auto_categorization, 1734 effective_from=effective_from, 1735 include_unmodified=include_unmodified, 1736 default_start=default_start, 1737 default_end=default_end, 1738 enable_preview=( 1739 enable_preview if enable_preview is not None else self._plan_preview_enabled 1740 ), 1741 end_bounded=not run, 1742 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1743 start_override_per_model=start_override_per_model, 1744 end_override_per_model=max_interval_end_per_model, 1745 console=self.console, 1746 user_provided_flags=user_provided_flags, 1747 selected_models={ 1748 dbt_unique_id 1749 for model in model_selector.expand_model_selections(select_models or "*") 1750 if (dbt_unique_id := snapshots[model].node.dbt_unique_id) 1751 }, 1752 explain=explain or False, 1753 ignore_cron=ignore_cron or False, 1754 )
Creates a plan builder.
Arguments:
- environment: The environment to diff and plan against.
- start: The start date of the backfill if there is one.
- end: The end date of the backfill if there is one.
- execution_time: The date/time reference to use for execution time. Defaults to now.
- create_from: The environment to create the target environment from if it doesn't exist. If not specified, the "prod" environment will be used.
- skip_tests: Unit tests are run by default so this will skip them if enabled
- restate_models: A list of either internal or external models, or tags, that need to be restated for the given plan interval. If the target environment is a production environment, ALL snapshots that depended on these upstream tables will have their intervals deleted (even ones not in this current environment). Only the snapshots in this environment will be backfilled whereas others need to be recovered on a future plan application. For development environments only snapshots that are part of this plan will be affected.
- no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
- skip_backfill: Whether to skip the backfill step. Default: False.
- empty_backfill: Like skip_backfill, but also records processed intervals.
- forward_only: Whether the purpose of the plan is to make forward only changes.
- allow_destructive_models: Models whose forward-only changes are allowed to be destructive.
- no_auto_categorization: Indicates whether to disable automatic categorization of model changes (breaking / non-breaking). If not provided, then the corresponding configuration option determines the behavior.
- categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the project config by default.
- effective_from: The effective date from which to apply forward-only changes on production.
- include_unmodified: Indicates whether to include unmodified models in the target development environment.
- select_models: A list of model selection strings to filter the models that should be included into this plan.
- backfill_models: A list of model selection strings to filter the models for which the data should be backfilled.
- enable_preview: Indicates whether to enable preview for forward-only models in development environments.
- run: Whether to run latest intervals as part of the plan application.
- diff_rendered: Whether the diff should compare raw vs rendered models
- min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered on every model when checking for missing intervals
- always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour.
Returns:
The plan builder.
1756 def apply( 1757 self, 1758 plan: Plan, 1759 circuit_breaker: t.Optional[t.Callable[[], bool]] = None, 1760 ) -> None: 1761 """Applies a plan by pushing snapshots and backfilling data. 1762 1763 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 1764 to backfill all models. 1765 1766 Args: 1767 plan: The plan to apply. 1768 circuit_breaker: An optional handler which checks if the apply should be aborted. 1769 """ 1770 if ( 1771 not plan.context_diff.has_changes 1772 and not plan.requires_backfill 1773 and not plan.has_unmodified_unpromoted 1774 ): 1775 return 1776 if plan.uncategorized: 1777 raise UncategorizedPlanError("Can't apply a plan with uncategorized changes.") 1778 1779 if plan.explain: 1780 explainer = PlanExplainer( 1781 state_reader=self.state_reader, 1782 default_catalog=self.default_catalog, 1783 console=self.console, 1784 ) 1785 explainer.evaluate(plan.to_evaluatable()) 1786 return 1787 1788 self.notification_target_manager.notify( 1789 NotificationEvent.APPLY_START, 1790 environment=plan.environment_naming_info.name, 1791 plan_id=plan.plan_id, 1792 ) 1793 try: 1794 self._apply(plan, circuit_breaker) 1795 except Exception as e: 1796 self.notification_target_manager.notify( 1797 NotificationEvent.APPLY_FAILURE, 1798 environment=plan.environment_naming_info.name, 1799 plan_id=plan.plan_id, 1800 exc=traceback.format_exc(), 1801 ) 1802 logger.info("Plan application failed.", exc_info=e) 1803 raise e 1804 self.notification_target_manager.notify( 1805 NotificationEvent.APPLY_END, 1806 environment=plan.environment_naming_info.name, 1807 plan_id=plan.plan_id, 1808 )
Applies a plan by pushing snapshots and backfilling data.
Given a plan, it pushes snapshots into the state sync and then uses the scheduler to backfill all models.
Arguments:
- plan: The plan to apply.
- circuit_breaker: An optional handler which checks if the apply should be aborted.
1810 @python_api_analytics 1811 def invalidate_environment(self, name: str, sync: bool = False) -> None: 1812 """Invalidates the target environment by setting its expiration timestamp to now. 1813 1814 Args: 1815 name: The name of the environment to invalidate. 1816 sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will 1817 be deleted asynchronously by the janitor process. 1818 """ 1819 name = Environment.sanitize_name(name) 1820 self.state_sync.invalidate_environment(name) 1821 if sync: 1822 self._cleanup_environments() 1823 self.console.log_success(f"Environment '{name}' deleted.") 1824 else: 1825 self.console.log_success(f"Environment '{name}' invalidated.")
Invalidates the target environment by setting its expiration timestamp to now.
Arguments:
- name: The name of the environment to invalidate.
- sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will be deleted asynchronously by the janitor process.
1827 @python_api_analytics 1828 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> bool: 1829 """Show a diff of the current context with a given environment. 1830 1831 Args: 1832 environment: The environment to diff against. 1833 detailed: Show the actual SQL differences if True. 1834 1835 Returns: 1836 True if there are changes, False otherwise. 1837 """ 1838 environment = environment or self.config.default_target_environment 1839 environment = Environment.sanitize_name(environment) 1840 context_diff = self._context_diff(environment) 1841 self.console.show_environment_difference_summary( 1842 context_diff, 1843 no_diff=not detailed, 1844 ) 1845 if context_diff.has_changes: 1846 self.console.show_model_difference_summary( 1847 context_diff, 1848 EnvironmentNamingInfo.from_environment_catalog_mapping( 1849 self.environment_catalog_mapping, 1850 name=environment, 1851 suffix_target=self.config.environment_suffix_target, 1852 normalize_name=context_diff.normalize_environment_name, 1853 ), 1854 self.default_catalog, 1855 no_diff=not detailed, 1856 ) 1857 return context_diff.has_changes
Show a diff of the current context with a given environment.
Arguments:
- environment: The environment to diff against.
- detailed: Show the actual SQL differences if True.
Returns:
True if there are changes, False otherwise.
1859 @python_api_analytics 1860 def table_diff( 1861 self, 1862 source: str, 1863 target: str, 1864 on: t.Optional[t.List[str] | exp.Expr] = None, 1865 skip_columns: t.Optional[t.List[str]] = None, 1866 select_models: t.Optional[t.Collection[str]] = None, 1867 where: t.Optional[str | exp.Expr] = None, 1868 limit: int = 20, 1869 show: bool = True, 1870 show_sample: bool = True, 1871 decimals: int = 3, 1872 skip_grain_check: bool = False, 1873 warn_grain_check: bool = False, 1874 temp_schema: t.Optional[str] = None, 1875 schema_diff_ignore_case: bool = False, 1876 **kwargs: t.Any, # catch-all to prevent an 'unexpected keyword argument' error if an table_diff extension passes in some extra arguments 1877 ) -> t.List[TableDiff]: 1878 """Show a diff between two tables. 1879 1880 Args: 1881 source: The source environment or table. 1882 target: The target environment or table. 1883 on: The join condition, table aliases must be "s" and "t" for source and target. 1884 If omitted, the table's grain will be used. 1885 skip_columns: The columns to skip when computing the table diff. 1886 select_models: The models or snapshots to use when environments are passed in. 1887 where: An optional where statement to filter results. 1888 limit: The limit of the sample dataframe. 1889 show: Show the table diff output in the console. 1890 show_sample: Show the sample dataframe in the console. Requires show=True. 1891 decimals: The number of decimal places to keep when comparing floating point columns. 1892 skip_grain_check: Skip check for rows that contain null or duplicate grains. 1893 temp_schema: The schema to use for temporary tables. 1894 1895 Returns: 1896 The list of TableDiff objects containing schema and summary differences. 1897 """ 1898 1899 if "|" in source or "|" in target: 1900 raise ConfigError( 1901 "Cross-database table diffing is available in Tobiko Cloud. Read more here: " 1902 "https://sqlmesh.readthedocs.io/en/stable/guides/tablediff/#diffing-tables-or-views-across-gateways" 1903 ) 1904 1905 table_diffs: t.List[TableDiff] = [] 1906 1907 # Diffs multiple or a single model across two environments 1908 if select_models: 1909 source_env = self.state_reader.get_environment(source) 1910 target_env = self.state_reader.get_environment(target) 1911 if not source_env: 1912 raise SQLMeshError(f"Could not find environment '{source}'") 1913 if not target_env: 1914 raise SQLMeshError(f"Could not find environment '{target}'") 1915 criteria = ", ".join(f"'{c}'" for c in select_models) 1916 try: 1917 selected_models = self._new_selector().expand_model_selections(select_models) 1918 if not selected_models: 1919 self.console.log_status_update( 1920 f"No models matched the selection criteria: {criteria}" 1921 ) 1922 except Exception as e: 1923 raise SQLMeshError(e) 1924 1925 models_to_diff: t.List[ 1926 t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Expr]] 1927 ] = [] 1928 models_without_grain: t.List[Model] = [] 1929 source_snapshots_to_name = { 1930 snapshot.name: snapshot for snapshot in source_env.snapshots 1931 } 1932 target_snapshots_to_name = { 1933 snapshot.name: snapshot for snapshot in target_env.snapshots 1934 } 1935 1936 for model_fqn in selected_models: 1937 model = self._models[model_fqn] 1938 adapter = self._get_engine_adapter(model.gateway) 1939 source_snapshot = source_snapshots_to_name.get(model.fqn) 1940 target_snapshot = target_snapshots_to_name.get(model.fqn) 1941 1942 if target_snapshot and source_snapshot: 1943 if (source_snapshot.fingerprint != target_snapshot.fingerprint) and ( 1944 (source_snapshot.version != target_snapshot.version) 1945 or source_snapshot.is_forward_only 1946 ): 1947 # Compare the virtual layer instead of the physical layer because the virtual layer is guaranteed to point 1948 # to the correct/active snapshot for the model in the specified environment, taking into account things like dev previews 1949 source = source_snapshot.qualified_view_name.for_environment( 1950 source_env.naming_info, adapter.dialect 1951 ) 1952 target = target_snapshot.qualified_view_name.for_environment( 1953 target_env.naming_info, adapter.dialect 1954 ) 1955 model_on = on or model.on 1956 if not model_on: 1957 models_without_grain.append(model) 1958 else: 1959 models_to_diff.append((model, adapter, source, target, model_on)) 1960 1961 if models_without_grain: 1962 model_names = "\n".join( 1963 f"─ {model.name} \n at '{model._path}'" for model in models_without_grain 1964 ) 1965 message = ( 1966 "SQLMesh doesn't know how to join the tables for the following models:\n" 1967 f"{model_names}\n\n" 1968 "Please specify a `grain` in each model definition. It must be unique and not null." 1969 ) 1970 if warn_grain_check: 1971 self.console.log_warning(message) 1972 else: 1973 raise SQLMeshError(message) 1974 1975 if models_to_diff: 1976 self.console.show_table_diff_details( 1977 [model[0].name for model in models_to_diff], 1978 ) 1979 1980 self.console.start_table_diff_progress(len(models_to_diff)) 1981 try: 1982 tasks_num = min(len(models_to_diff), self.concurrent_tasks) 1983 table_diffs = concurrent_apply_to_values( 1984 list(models_to_diff), 1985 lambda model_info: self._model_diff( 1986 model=model_info[0], 1987 adapter=model_info[1], 1988 source=model_info[2], 1989 target=model_info[3], 1990 on=model_info[4], 1991 source_alias=source_env.name, 1992 target_alias=target_env.name, 1993 limit=limit, 1994 decimals=decimals, 1995 skip_columns=skip_columns, 1996 where=where, 1997 show=show, 1998 temp_schema=temp_schema, 1999 skip_grain_check=skip_grain_check, 2000 schema_diff_ignore_case=schema_diff_ignore_case, 2001 ), 2002 tasks_num=tasks_num, 2003 ) 2004 self.console.stop_table_diff_progress(success=True) 2005 except: 2006 self.console.stop_table_diff_progress(success=False) 2007 raise 2008 elif selected_models: 2009 self.console.log_status_update( 2010 f"No models contain differences with the selection criteria: {criteria}" 2011 ) 2012 2013 else: 2014 table_diffs = [ 2015 self._table_diff( 2016 source=source, 2017 target=target, 2018 source_alias=source, 2019 target_alias=target, 2020 limit=limit, 2021 decimals=decimals, 2022 adapter=self.engine_adapter, 2023 on=on, 2024 skip_columns=skip_columns, 2025 where=where, 2026 schema_diff_ignore_case=schema_diff_ignore_case, 2027 ) 2028 ] 2029 2030 if show: 2031 self.console.show_table_diff(table_diffs, show_sample, skip_grain_check, temp_schema) 2032 2033 return table_diffs
Show a diff between two tables.
Arguments:
- source: The source environment or table.
- target: The target environment or table.
- on: The join condition, table aliases must be "s" and "t" for source and target. If omitted, the table's grain will be used.
- skip_columns: The columns to skip when computing the table diff.
- select_models: The models or snapshots to use when environments are passed in.
- where: An optional where statement to filter results.
- limit: The limit of the sample dataframe.
- show: Show the table diff output in the console.
- show_sample: Show the sample dataframe in the console. Requires show=True.
- decimals: The number of decimal places to keep when comparing floating point columns.
- skip_grain_check: Skip check for rows that contain null or duplicate grains.
- temp_schema: The schema to use for temporary tables.
Returns:
The list of TableDiff objects containing schema and summary differences.
2114 @python_api_analytics 2115 def get_dag( 2116 self, select_models: t.Optional[t.Collection[str]] = None, **options: t.Any 2117 ) -> GraphHTML: 2118 """Gets an HTML object representation of the DAG. 2119 2120 Args: 2121 select_models: A list of model selection strings that should be included in the dag. 2122 Returns: 2123 An html object that renders the dag. 2124 """ 2125 dag = ( 2126 self.dag.prune(*self._new_selector().expand_model_selections(select_models)) 2127 if select_models 2128 else self.dag 2129 ) 2130 2131 nodes = {} 2132 edges: t.List[t.Dict] = [] 2133 2134 for node, deps in dag.graph.items(): 2135 nodes[node] = { 2136 "id": node, 2137 "label": node.split(".")[-1], 2138 "title": f"<span>{node}</span>", 2139 } 2140 edges.extend({"from": d, "to": node} for d in deps) 2141 2142 return GraphHTML( 2143 nodes, 2144 edges, 2145 options={ 2146 "height": "100%", 2147 "width": "100%", 2148 "interaction": {}, 2149 "layout": { 2150 "hierarchical": { 2151 "enabled": True, 2152 "nodeSpacing": 200, 2153 "sortMethod": "directed", 2154 }, 2155 }, 2156 "nodes": { 2157 "shape": "box", 2158 }, 2159 **options, 2160 }, 2161 )
Gets an HTML object representation of the DAG.
Arguments:
- select_models: A list of model selection strings that should be included in the dag.
Returns:
An html object that renders the dag.
2163 @python_api_analytics 2164 def render_dag(self, path: str, select_models: t.Optional[t.Collection[str]] = None) -> None: 2165 """Render the dag as HTML and save it to a file. 2166 2167 Args: 2168 path: filename to save the dag html to 2169 select_models: A list of model selection strings that should be included in the dag. 2170 """ 2171 file_path = Path(path) 2172 suffix = file_path.suffix 2173 if suffix != ".html": 2174 if suffix: 2175 get_console().log_warning( 2176 f"The extension {suffix} does not designate an html file. A file with a `.html` extension will be created instead." 2177 ) 2178 path = str(file_path.with_suffix(".html")) 2179 2180 with open(path, "w", encoding="utf-8") as file: 2181 file.write(str(self.get_dag(select_models)))
Render the dag as HTML and save it to a file.
Arguments:
- path: filename to save the dag html to
- select_models: A list of model selection strings that should be included in the dag.
2183 @python_api_analytics 2184 def create_test( 2185 self, 2186 model: str, 2187 input_queries: t.Dict[str, str], 2188 overwrite: bool = False, 2189 variables: t.Optional[t.Dict[str, str]] = None, 2190 path: t.Optional[str] = None, 2191 name: t.Optional[str] = None, 2192 include_ctes: bool = False, 2193 ) -> None: 2194 """Generate a unit test fixture for a given model. 2195 2196 Args: 2197 model: The model to test. 2198 input_queries: Mapping of model names to queries. Each model included in this mapping 2199 will be populated in the test based on the results of the corresponding query. 2200 overwrite: Whether to overwrite the existing test in case of a file path collision. 2201 When set to False, an error will be raised if there is such a collision. 2202 variables: Key-value pairs that will define variables needed by the model. 2203 path: The file path corresponding to the fixture, relative to the test directory. 2204 By default, the fixture will be created under the test directory and the file name 2205 will be inferred from the test's name. 2206 name: The name of the test. This is inferred from the model name by default. 2207 include_ctes: When true, CTE fixtures will also be generated. 2208 """ 2209 input_queries = { 2210 # The get_model here has two purposes: return normalized names & check for missing deps 2211 self.get_model(dep, raise_if_missing=True).fqn: query 2212 for dep, query in input_queries.items() 2213 } 2214 2215 try: 2216 model_to_test = self.get_model(model, raise_if_missing=True) 2217 test_adapter = self.test_connection_config.create_engine_adapter( 2218 register_comments_override=False 2219 ) 2220 2221 generate_test( 2222 model=model_to_test, 2223 input_queries=input_queries, 2224 models=self._models, 2225 engine_adapter=self._get_engine_adapter(model_to_test.gateway), 2226 test_engine_adapter=test_adapter, 2227 project_path=self.path, 2228 overwrite=overwrite, 2229 variables=variables, 2230 path=path, 2231 name=name, 2232 include_ctes=include_ctes, 2233 ) 2234 finally: 2235 if test_adapter: 2236 test_adapter.close()
Generate a unit test fixture for a given model.
Arguments:
- model: The model to test.
- input_queries: Mapping of model names to queries. Each model included in this mapping will be populated in the test based on the results of the corresponding query.
- overwrite: Whether to overwrite the existing test in case of a file path collision. When set to False, an error will be raised if there is such a collision.
- variables: Key-value pairs that will define variables needed by the model.
- path: The file path corresponding to the fixture, relative to the test directory. By default, the fixture will be created under the test directory and the file name will be inferred from the test's name.
- name: The name of the test. This is inferred from the model name by default.
- include_ctes: When true, CTE fixtures will also be generated.
2238 @python_api_analytics 2239 def test( 2240 self, 2241 match_patterns: t.Optional[t.List[str]] = None, 2242 tests: t.Optional[t.List[str]] = None, 2243 verbosity: Verbosity = Verbosity.DEFAULT, 2244 preserve_fixtures: bool = False, 2245 stream: t.Optional[t.TextIO] = None, 2246 ) -> ModelTextTestResult: 2247 """Discover and run model tests""" 2248 if verbosity >= Verbosity.VERBOSE: 2249 import pandas as pd 2250 2251 pd.set_option("display.max_columns", None) 2252 2253 test_meta = self.select_tests(tests=tests, patterns=match_patterns) 2254 2255 result = run_tests( 2256 model_test_metadata=test_meta, 2257 models=self._models, 2258 config=self.config, 2259 selected_gateway=self.selected_gateway, 2260 dialect=self.default_dialect, 2261 verbosity=verbosity, 2262 preserve_fixtures=preserve_fixtures, 2263 stream=stream, 2264 default_catalog=self.default_catalog, 2265 default_catalog_dialect=self.config.dialect or "", 2266 ) 2267 2268 self.console.log_test_results( 2269 result, 2270 self.test_connection_config._engine_adapter.DIALECT, 2271 ) 2272 2273 return result
Discover and run model tests
2275 @python_api_analytics 2276 def audit( 2277 self, 2278 start: TimeLike, 2279 end: TimeLike, 2280 *, 2281 models: t.Optional[t.Iterator[str]] = None, 2282 execution_time: t.Optional[TimeLike] = None, 2283 ) -> bool: 2284 """Audit models. 2285 2286 Args: 2287 start: The start of the interval to audit. 2288 end: The end of the interval to audit. 2289 models: The models to audit. All models will be audited if not specified. 2290 execution_time: The date/time time reference to use for execution time. Defaults to now. 2291 2292 Returns: 2293 False if any of the audits failed, True otherwise. 2294 """ 2295 2296 snapshots = ( 2297 [self.get_snapshot(model, raise_if_missing=True) for model in models] 2298 if models 2299 else self.snapshots.values() 2300 ) 2301 2302 num_audits = sum(len(snapshot.node.audits_with_args) for snapshot in snapshots) 2303 self.console.log_status_update(f"Found {num_audits} audit(s).") 2304 2305 errors = [] 2306 skipped_count = 0 2307 for snapshot in snapshots: 2308 for audit_result in self.snapshot_evaluator.audit( 2309 snapshot=snapshot, 2310 start=start, 2311 end=end, 2312 execution_time=execution_time, 2313 snapshots=self.snapshots, 2314 ): 2315 audit_id = f"{audit_result.audit.name}" 2316 if audit_result.model: 2317 audit_id += f" on model {audit_result.model.name}" 2318 2319 if audit_result.skipped: 2320 self.console.log_status_update(f"{audit_id} ⏸️ SKIPPED.") 2321 skipped_count += 1 2322 elif audit_result.count: 2323 errors.append(audit_result) 2324 self.console.log_status_update( 2325 f"{audit_id} ❌ [red]FAIL [{audit_result.count}][/red]." 2326 ) 2327 else: 2328 self.console.log_status_update(f"{audit_id} ✅ [green]PASS[/green].") 2329 2330 self.console.log_status_update( 2331 f"\nFinished with {len(errors)} audit error{'' if len(errors) == 1 else 's'} " 2332 f"and {skipped_count} audit{'' if skipped_count == 1 else 's'} skipped." 2333 ) 2334 for error in errors: 2335 self.console.log_status_update( 2336 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 2337 ) 2338 self.console.log_status_update(f"Got {error.count} results, expected 0.") 2339 if error.query: 2340 self.console.show_sql( 2341 f"{error.query.sql(dialect=self.snapshot_evaluator.adapter.dialect)}" 2342 ) 2343 2344 self.console.log_status_update("Done.") 2345 return not errors
Audit models.
Arguments:
- start: The start of the interval to audit.
- end: The end of the interval to audit.
- models: The models to audit. All models will be audited if not specified.
- execution_time: The date/time time reference to use for execution time. Defaults to now.
Returns:
False if any of the audits failed, True otherwise.
2347 @python_api_analytics 2348 def rewrite(self, sql: str, dialect: str = "") -> exp.Expr: 2349 """Rewrite a sql expression with semantic references into an executable query. 2350 2351 https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ 2352 2353 Args: 2354 sql: The sql string to rewrite. 2355 dialect: The dialect of the sql string, defaults to the project dialect. 2356 2357 Returns: 2358 A SQLGlot expression with semantic references expanded. 2359 """ 2360 return rewrite( 2361 sql, 2362 graph=ReferenceGraph(self.models.values()), 2363 metrics=self._metrics, 2364 dialect=dialect or self.default_dialect, 2365 )
Rewrite a sql expression with semantic references into an executable query.
https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/
Arguments:
- sql: The sql string to rewrite.
- dialect: The dialect of the sql string, defaults to the project dialect.
Returns:
A SQLGlot expression with semantic references expanded.
2367 @python_api_analytics 2368 def check_intervals( 2369 self, 2370 environment: t.Optional[str], 2371 no_signals: bool, 2372 select_models: t.Collection[str], 2373 start: t.Optional[TimeLike] = None, 2374 end: t.Optional[TimeLike] = None, 2375 ) -> t.Dict[Snapshot, SnapshotIntervals]: 2376 """Check intervals for a given environment. 2377 2378 Args: 2379 environment: The environment or prod if None. 2380 select_models: A list of model selection strings to show intervals for. 2381 start: The start of the intervals to check. 2382 end: The end of the intervals to check. 2383 """ 2384 2385 environment = environment or c.PROD 2386 env = self.state_reader.get_environment(environment) 2387 if not env: 2388 raise SQLMeshError(f"Environment '{environment}' was not found.") 2389 2390 snapshots = {k.name: v for k, v in self.state_sync.get_snapshots(env.snapshots).items()} 2391 2392 missing = { 2393 k.name: v 2394 for k, v in missing_intervals( 2395 snapshots.values(), start=start, end=end, execution_time=end 2396 ).items() 2397 } 2398 2399 if select_models: 2400 selected: t.Collection[str] = self._select_models_for_run( 2401 select_models, True, snapshots.values() 2402 ) 2403 else: 2404 selected = snapshots.keys() 2405 2406 results = {} 2407 execution_context = self.execution_context(snapshots=snapshots) 2408 2409 for fqn in selected: 2410 snapshot = snapshots[fqn] 2411 intervals = missing.get(fqn) or [] 2412 2413 results[snapshot] = SnapshotIntervals( 2414 snapshot.snapshot_id, 2415 intervals 2416 if no_signals 2417 else snapshot.check_ready_intervals(intervals, execution_context), 2418 ) 2419 2420 return results
Check intervals for a given environment.
Arguments:
- environment: The environment or prod if None.
- select_models: A list of model selection strings to show intervals for.
- start: The start of the intervals to check.
- end: The end of the intervals to check.
2422 @python_api_analytics 2423 def migrate(self) -> None: 2424 """Migrates SQLMesh to the current running version. 2425 2426 Please contact your SQLMesh administrator before doing this. 2427 """ 2428 self.notification_target_manager.notify(NotificationEvent.MIGRATION_START) 2429 self._load_materializations() 2430 try: 2431 self._new_state_sync().migrate( 2432 promoted_snapshots_only=self.config.migration.promoted_snapshots_only, 2433 ) 2434 except Exception as e: 2435 self.notification_target_manager.notify( 2436 NotificationEvent.MIGRATION_FAILURE, traceback.format_exc() 2437 ) 2438 raise e 2439 self.notification_target_manager.notify(NotificationEvent.MIGRATION_END)
Migrates SQLMesh to the current running version.
Please contact your SQLMesh administrator before doing this.
2441 @python_api_analytics 2442 def rollback(self) -> None: 2443 """Rolls back SQLMesh to the previous migration. 2444 2445 Please contact your SQLMesh administrator before doing this. This action cannot be undone. 2446 """ 2447 self._new_state_sync().rollback()
Rolls back SQLMesh to the previous migration.
Please contact your SQLMesh administrator before doing this. This action cannot be undone.
2449 @python_api_analytics 2450 def create_external_models(self, strict: bool = False) -> None: 2451 """Create a file to document the schema of external models. 2452 2453 The external models file contains all columns and types of external models, allowing for more 2454 robust lineage, validation, and optimizations. 2455 2456 Args: 2457 strict: If True, raise an error if the external model is missing in the database. 2458 """ 2459 if not self._models: 2460 self.load(update_schemas=False) 2461 2462 for path, config in self.configs.items(): 2463 deprecated_yaml = path / c.EXTERNAL_MODELS_DEPRECATED_YAML 2464 2465 external_models_yaml = ( 2466 path / c.EXTERNAL_MODELS_YAML if not deprecated_yaml.exists() else deprecated_yaml 2467 ) 2468 2469 external_models_gateway: t.Optional[str] = self.gateway or self.config.default_gateway 2470 if not external_models_gateway: 2471 # can happen if there was no --gateway defined and the default_gateway is '' 2472 # which means that the single gateway syntax is being used which means there is 2473 # no named gateway which means we should not stamp `gateway:` on the external models 2474 external_models_gateway = None 2475 2476 create_external_models_file( 2477 path=external_models_yaml, 2478 models=UniqueKeyDict( 2479 "models", 2480 { 2481 fqn: model 2482 for fqn, model in self._models.items() 2483 if self.config_for_node(model) is config 2484 }, 2485 ), 2486 adapter=self.engine_adapter, 2487 state_reader=self.state_reader, 2488 dialect=config.model_defaults.dialect, 2489 gateway=external_models_gateway, 2490 max_workers=self.concurrent_tasks, 2491 strict=strict, 2492 )
Create a file to document the schema of external models.
The external models file contains all columns and types of external models, allowing for more robust lineage, validation, and optimizations.
Arguments:
- strict: If True, raise an error if the external model is missing in the database.
2494 @python_api_analytics 2495 def print_info( 2496 self, skip_connection: bool = False, verbosity: Verbosity = Verbosity.DEFAULT 2497 ) -> None: 2498 """Prints information about connections, models, macros, etc. to the console.""" 2499 self.console.log_status_update(f"Models: {len(self.models)}") 2500 self.console.log_status_update(f"Macros: {len(self._macros) - len(macro.get_registry())}") 2501 2502 if skip_connection: 2503 return 2504 2505 if verbosity >= Verbosity.VERBOSE: 2506 self.console.log_status_update("") 2507 print_config(self.config.get_connection(self.gateway), self.console, "Connection") 2508 print_config( 2509 self.config.get_test_connection(self.gateway), self.console, "Test Connection" 2510 ) 2511 print_config( 2512 self.config.get_state_connection(self.gateway), self.console, "State Connection" 2513 ) 2514 2515 self._try_connection("data warehouse", self.engine_adapter.ping) 2516 state_connection = self.config.get_state_connection(self.gateway) 2517 if state_connection: 2518 self._try_connection("state backend", state_connection.connection_validator())
Prints information about connections, models, macros, etc. to the console.
2520 @python_api_analytics 2521 def print_environment_names(self) -> None: 2522 """Prints all environment names along with expiry datetime.""" 2523 result = self._new_state_sync().get_environments_summary() 2524 if not result: 2525 raise SQLMeshError( 2526 "This project has no environments. Create an environment using the `sqlmesh plan` command." 2527 ) 2528 self.console.print_environments(result)
Prints all environment names along with expiry datetime.
2530 def close(self) -> None: 2531 """Releases all resources allocated by this context.""" 2532 if self._snapshot_evaluator: 2533 self._snapshot_evaluator.close() 2534 2535 if self._state_sync: 2536 self._state_sync.close()
Releases all resources allocated by this context.
2591 @python_api_analytics 2592 def table_name( 2593 self, model_name: str, environment: t.Optional[str] = None, prod: bool = False 2594 ) -> str: 2595 """Returns the name of the pysical table for the given model name in the target environment. 2596 2597 Args: 2598 model_name: The name of the model. 2599 environment: The environment to source the model version from. 2600 prod: If True, return the name of the physical table that will be used in production for the model version 2601 promoted in the target environment. 2602 2603 Returns: 2604 The name of the physical table. 2605 """ 2606 environment = environment or self.config.default_target_environment 2607 fqn = self._node_or_snapshot_to_fqn(model_name) 2608 target_env = self.state_reader.get_environment(environment) 2609 if not target_env: 2610 raise SQLMeshError(f"Environment '{environment}' was not found.") 2611 2612 snapshot_info = None 2613 for s in target_env.snapshots: 2614 if s.name == fqn: 2615 snapshot_info = s 2616 break 2617 if not snapshot_info: 2618 raise SQLMeshError( 2619 f"Model '{model_name}' was not found in environment '{environment}'." 2620 ) 2621 2622 if target_env.name == c.PROD or prod: 2623 return snapshot_info.table_name() 2624 2625 snapshots = self.state_reader.get_snapshots(target_env.snapshots) 2626 deployability_index = DeployabilityIndex.create(snapshots) 2627 2628 return snapshot_info.table_name( 2629 is_deployable=deployability_index.is_deployable(snapshot_info.snapshot_id) 2630 )
Returns the name of the pysical table for the given model name in the target environment.
Arguments:
- model_name: The name of the model.
- environment: The environment to source the model version from.
- prod: If True, return the name of the physical table that will be used in production for the model version promoted in the target environment.
Returns:
The name of the physical table.
2632 def clear_caches(self) -> None: 2633 paths_to_remove = [path / c.CACHE for path in self.configs] 2634 paths_to_remove.append(self.cache_dir) 2635 2636 if IS_WINDOWS: 2637 paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] 2638 2639 for path in paths_to_remove: 2640 if path.exists(): 2641 rmtree(path) 2642 2643 if isinstance(self._state_sync, CachingStateSync): 2644 self._state_sync.clear_cache()
2646 def export_state( 2647 self, 2648 output_file: Path, 2649 environment_names: t.Optional[t.List[str]] = None, 2650 local_only: bool = False, 2651 confirm: bool = True, 2652 ) -> None: 2653 from sqlmesh.core.state_sync.export_import import export_state 2654 2655 # trigger a connection to the StateSync so we can fail early if there is a problem 2656 # note we still need to do this even if we are doing a local export so we know what 'versions' to write 2657 self.state_sync.get_versions(validate=True) 2658 2659 local_snapshots = self.snapshots if local_only else None 2660 2661 if self.console.start_state_export( 2662 output_file=output_file, 2663 gateway=self.selected_gateway, 2664 state_connection_config=self._state_connection_config, 2665 environment_names=environment_names, 2666 local_only=local_only, 2667 confirm=confirm, 2668 ): 2669 try: 2670 export_state( 2671 state_sync=self.state_sync, 2672 output_file=output_file, 2673 local_snapshots=local_snapshots, 2674 environment_names=environment_names, 2675 console=self.console, 2676 ) 2677 self.console.stop_state_export(success=True, output_file=output_file) 2678 except: 2679 self.console.stop_state_export(success=False, output_file=output_file) 2680 raise
2682 def import_state(self, input_file: Path, clear: bool = False, confirm: bool = True) -> None: 2683 from sqlmesh.core.state_sync.export_import import import_state 2684 2685 if self.console.start_state_import( 2686 input_file=input_file, 2687 gateway=self.selected_gateway, 2688 state_connection_config=self._state_connection_config, 2689 clear=clear, 2690 confirm=confirm, 2691 ): 2692 try: 2693 import_state( 2694 state_sync=self.state_sync, 2695 input_file=input_file, 2696 clear=clear, 2697 console=self.console, 2698 ) 2699 self.console.stop_state_import(success=True, input_file=input_file) 2700 except: 2701 self.console.stop_state_import(success=False, input_file=input_file) 2702 raise
2742 @cached_property 2743 def cache_dir(self) -> Path: 2744 if self.config.cache_dir: 2745 cache_path = Path(self.config.cache_dir) 2746 if cache_path.is_absolute(): 2747 return cache_path 2748 return self.path / cache_path 2749 2750 # Default to .cache directory in the project path 2751 return self.path / c.CACHE
2753 @cached_property 2754 def engine_adapters(self) -> t.Dict[str, EngineAdapter]: 2755 """Returns all the engine adapters for the gateways defined in the configurations.""" 2756 adapters: t.Dict[str, EngineAdapter] = {self.selected_gateway: self.engine_adapter} 2757 for config in self.configs.values(): 2758 for gateway_name in config.gateways: 2759 if gateway_name not in adapters: 2760 connection = config.get_connection(gateway_name) 2761 adapter = connection.create_engine_adapter( 2762 concurrent_tasks=self.concurrent_tasks, 2763 ) 2764 adapters[gateway_name] = adapter 2765 return adapters
Returns all the engine adapters for the gateways defined in the configurations.
2767 @cached_property 2768 def default_catalog_per_gateway(self) -> t.Dict[str, str]: 2769 """Returns the default catalogs for each engine adapter.""" 2770 return self._scheduler.get_default_catalog_per_gateway(self)
Returns the default catalogs for each engine adapter.
2790 @cached_property 2791 def environment_catalog_mapping(self) -> RegexKeyDict: 2792 engine_adapter = None 2793 try: 2794 engine_adapter = self.engine_adapter 2795 except Exception: 2796 pass 2797 2798 if ( 2799 self.config.environment_catalog_mapping 2800 and engine_adapter 2801 and not self.engine_adapter.catalog_support.is_multi_catalog_supported 2802 ): 2803 raise SQLMeshError( 2804 "Environment catalog mapping is only supported for engine adapters that support multiple catalogs" 2805 ) 2806 return self.config.environment_catalog_mapping
3203 def lint_models( 3204 self, 3205 models: t.Optional[t.Iterable[t.Union[str, Model]]] = None, 3206 raise_on_error: bool = True, 3207 ) -> t.List[AnnotatedRuleViolation]: 3208 found_error = False 3209 3210 model_list = ( 3211 list(self.get_model(model, raise_if_missing=True) for model in models) 3212 if models 3213 else self.models.values() 3214 ) 3215 all_violations = [] 3216 for model in model_list: 3217 # Linter may be `None` if the context is not loaded yet 3218 if linter := self._linters.get(model.project): 3219 lint_violation, violations = ( 3220 linter.lint_model(model, self, console=self.console) or found_error 3221 ) 3222 if lint_violation: 3223 found_error = True 3224 all_violations.extend(violations) 3225 3226 if raise_on_error and found_error: 3227 raise LinterError( 3228 "Linter detected errors in the code. Please fix them before proceeding." 3229 ) 3230 3231 return all_violations
3233 def select_tests( 3234 self, 3235 tests: t.Optional[t.List[str]] = None, 3236 patterns: t.Optional[t.List[str]] = None, 3237 ) -> t.List[ModelTestMetadata]: 3238 """Filter pre-loaded test metadata based on tests and patterns.""" 3239 3240 test_meta = self._model_test_metadata 3241 3242 if tests: 3243 filtered_tests = [] 3244 for test in tests: 3245 if "::" in test: 3246 if test in self._model_test_metadata_fully_qualified_name_index: 3247 filtered_tests.append( 3248 self._model_test_metadata_fully_qualified_name_index[test] 3249 ) 3250 else: 3251 test_path = Path(test) 3252 if test_path in self._model_test_metadata_path_index: 3253 filtered_tests.extend(self._model_test_metadata_path_index[test_path]) 3254 3255 test_meta = filtered_tests 3256 3257 if patterns: 3258 test_meta = filter_tests_by_patterns(test_meta, patterns) 3259 3260 return test_meta
Filter pre-loaded test metadata based on tests and patterns.
Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
Arguments:
- notification_targets: The notification target to use. Defaults to what is defined in config.
- paths: The directories containing SQLMesh files.
- config: A Config object or the name of a Config object in config.py.
- connection: The name of the connection. If not specified the first connection as it appears in configuration will be used.
- test_connection: The name of the connection to use for tests. If not specified the first connection as it appears in configuration will be used.
- concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
- load: Whether or not to automatically load all models and macros (default True).
- console: The rich instance used for printing out CLI command results.
- users: A list of users to make known to SQLMesh.
The type of config object to use (default: Config).
Inherited Members
- GenericContext
- GenericContext
- PLAN_BUILDER_TYPE
- configs
- dag
- gateway
- environment_ttl
- pinned_environments
- auto_categorize_changes
- selected_gateway
- console
- notification_targets
- users
- default_dialect
- engine_adapter
- snapshot_evaluator
- execution_context
- upsert_model
- scheduler
- create_scheduler
- state_sync
- state_reader
- refresh
- load
- run
- run_janitor
- destroy
- get_model
- get_snapshot
- config_for_path
- config_for_node
- models
- metrics
- standalone_audits
- models_with_tests
- snapshots
- requirements
- default_catalog
- render
- evaluate
- format
- plan
- plan_builder
- apply
- invalidate_environment
- diff
- table_diff
- get_dag
- render_dag
- create_test
- test
- audit
- rewrite
- check_intervals
- migrate
- rollback
- create_external_models
- print_info
- print_environment_names
- close
- table_name
- clear_caches
- export_state
- import_state
- cache_dir
- engine_adapters
- default_catalog_per_gateway
- concurrent_tasks
- connection_config
- test_connection_config
- environment_catalog_mapping
- lint_models
- select_tests