Context
A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and
load your project's models, macros, and audits. Afterwards, you can use the context to create and apply
plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks.
For more information regarding what a context can do, see Context.
Examples:
Creating and applying a plan against the staging environment.
from sqlmesh.core.context import Context
context = Context(paths="example", config="local_config")
plan = context.plan("staging")
context.apply(plan)
Running audits on your data.
from sqlmesh.core.context import Context
context = Context(paths="example", config="local_config")
context.audit("yesterday", "now")
Running tests on your models.
from sqlmesh.core.context import Context
context = Context(paths="example")
context.test()
1""" 2# Context 3 4A SQLMesh context encapsulates a SQLMesh environment. When you create a new context, it will discover and 5load your project's models, macros, and audits. Afterwards, you can use the context to create and apply 6plans, visualize your model's lineage, run your audits and model tests, and perform various other tasks. 7For more information regarding what a context can do, see `sqlmesh.core.context.Context`. 8 9# Examples: 10 11Creating and applying a plan against the staging environment. 12```python 13from sqlmesh.core.context import Context 14context = Context(paths="example", config="local_config") 15plan = context.plan("staging") 16context.apply(plan) 17``` 18 19Running audits on your data. 20```python 21from sqlmesh.core.context import Context 22context = Context(paths="example", config="local_config") 23context.audit("yesterday", "now") 24``` 25 26Running tests on your models. 27```python 28from sqlmesh.core.context import Context 29context = Context(paths="example") 30context.test() 31``` 32""" 33 34from __future__ import annotations 35 36import abc 37import collections 38import logging 39import sys 40import time 41import traceback 42import typing as t 43from functools import cached_property 44from io import StringIO 45from itertools import chain 46from pathlib import Path 47from shutil import rmtree 48from types import MappingProxyType 49from datetime import datetime 50 51from sqlglot import Dialect, exp 52from sqlglot.helper import first 53from sqlglot.lineage import GraphHTML 54 55from sqlmesh.core import analytics 56from sqlmesh.core import constants as c 57from sqlmesh.core.analytics import python_api_analytics 58from sqlmesh.core.audit import Audit, ModelAudit, StandaloneAudit 59from sqlmesh.core.config import ( 60 CategorizerConfig, 61 Config, 62 load_configs, 63) 64from sqlmesh.core.config.connection import ConnectionConfig 65from sqlmesh.core.config.loader import C 66from sqlmesh.core.config.root import RegexKeyDict 67from sqlmesh.core.console import get_console 68from sqlmesh.core.context_diff import ContextDiff 69from sqlmesh.core.dialect import ( 70 format_model_expressions, 71 is_meta_expression, 72 normalize_model_name, 73 pandas_to_sql, 74 parse, 75 parse_one, 76) 77from sqlmesh.core.engine_adapter import EngineAdapter 78from sqlmesh.core.environment import Environment, EnvironmentNamingInfo, EnvironmentStatements 79from sqlmesh.core.loader import Loader 80from sqlmesh.core.linter.definition import AnnotatedRuleViolation, Linter 81from sqlmesh.core.linter.rules import BUILTIN_RULES 82from sqlmesh.core.macros import ExecutableOrMacro, macro 83from sqlmesh.core.metric import Metric, rewrite 84from sqlmesh.core.model import Model, update_model_schemas 85from sqlmesh.core.config.model import ModelDefaultsConfig 86from sqlmesh.core.notification_target import ( 87 NotificationEvent, 88 NotificationTarget, 89 NotificationTargetManager, 90) 91from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals, PlanExplainer 92from sqlmesh.core.plan.definition import UserProvidedFlags 93from sqlmesh.core.reference import ReferenceGraph 94from sqlmesh.core.scheduler import Scheduler, CompletionStatus 95from sqlmesh.core.schema_loader import create_external_models_file 96from sqlmesh.core.selector import Selector, NativeSelector 97from sqlmesh.core.snapshot import ( 98 DeployabilityIndex, 99 Snapshot, 100 SnapshotEvaluator, 101 SnapshotFingerprint, 102 missing_intervals, 103 to_table_mapping, 104) 105from sqlmesh.core.snapshot.definition import get_next_model_interval_start 106from sqlmesh.core.state_sync import ( 107 CachingStateSync, 108 StateReader, 109 StateSync, 110) 111from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots 112from sqlmesh.core.table_diff import TableDiff 113from sqlmesh.core.test import ( 114 ModelTextTestResult, 115 ModelTestMetadata, 116 generate_test, 117 run_tests, 118 filter_tests_by_patterns, 119) 120from sqlmesh.core.user import User 121from sqlmesh.utils import UniqueKeyDict, Verbosity 122from sqlmesh.utils.concurrency import concurrent_apply_to_values 123from sqlmesh.utils.dag import DAG 124from sqlmesh.utils.date import ( 125 TimeLike, 126 to_timestamp, 127 format_tz_datetime, 128 now_timestamp, 129 now, 130 to_datetime, 131 make_exclusive, 132) 133from sqlmesh.utils.errors import ( 134 CircuitBreakerError, 135 ConfigError, 136 PlanError, 137 SQLMeshError, 138 UncategorizedPlanError, 139 LinterError, 140) 141from sqlmesh.utils.config import print_config 142from sqlmesh.utils.jinja import JinjaMacroRegistry 143from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path 144 145if t.TYPE_CHECKING: 146 import pandas as pd 147 from typing_extensions import Literal 148 149 from sqlmesh.core.engine_adapter._typing import ( 150 BigframeSession, 151 DF, 152 PySparkDataFrame, 153 PySparkSession, 154 SnowparkSession, 155 ) 156 from sqlmesh.core.snapshot import Node 157 158 from sqlmesh.core.snapshot.definition import Intervals 159 160 ModelOrSnapshot = t.Union[str, Model, Snapshot] 161 NodeOrSnapshot = t.Union[str, Model, StandaloneAudit, Snapshot] 162 163logger = logging.getLogger(__name__) 164 165 166class BaseContext(abc.ABC): 167 """The base context which defines methods to execute a model.""" 168 169 @property 170 @abc.abstractmethod 171 def default_dialect(self) -> t.Optional[str]: 172 """Returns the default dialect.""" 173 174 @property 175 @abc.abstractmethod 176 def _model_tables(self) -> t.Dict[str, str]: 177 """Returns a mapping of model names to tables.""" 178 179 @property 180 @abc.abstractmethod 181 def engine_adapter(self) -> EngineAdapter: 182 """Returns an engine adapter.""" 183 184 @property 185 def spark(self) -> t.Optional[PySparkSession]: 186 """Returns the spark session if it exists.""" 187 return self.engine_adapter.spark 188 189 @property 190 def snowpark(self) -> t.Optional[SnowparkSession]: 191 """Returns the snowpark session if it exists.""" 192 return self.engine_adapter.snowpark 193 194 @property 195 def bigframe(self) -> t.Optional[BigframeSession]: 196 """Returns the bigframe session if it exists.""" 197 return self.engine_adapter.bigframe 198 199 @property 200 def default_catalog(self) -> t.Optional[str]: 201 raise NotImplementedError 202 203 def table(self, model_name: str) -> str: 204 get_console().log_warning( 205 "The SQLMesh context's `table` method is deprecated and will be removed " 206 "in a future release. Please use the `resolve_table` method instead." 207 ) 208 return self.resolve_table(model_name) 209 210 def resolve_table(self, model_name: str) -> str: 211 """Gets the physical table name for a given model. 212 213 Args: 214 model_name: The model name. 215 216 Returns: 217 The physical table name. 218 """ 219 model_name = normalize_model_name(model_name, self.default_catalog, self.default_dialect) 220 221 if model_name not in self._model_tables: 222 model_name_list = "\n".join(list(self._model_tables)) 223 logger.debug( 224 f"'{model_name}' not found in model to table mapping. Available model names: \n{model_name_list}" 225 ) 226 raise SQLMeshError( 227 f"Unable to find a table mapping for model '{model_name}'. Has it been spelled correctly?" 228 ) 229 230 # We generate SQL for the default dialect because the table name may be used in a 231 # fetchdf call and so the quotes need to be correct (eg. backticks for bigquery) 232 return parse_one(self._model_tables[model_name]).sql( 233 dialect=self.default_dialect, identify=True 234 ) 235 236 def fetchdf( 237 self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False 238 ) -> pd.DataFrame: 239 """Fetches a dataframe given a sql string or sqlglot expression. 240 241 Args: 242 query: SQL string or sqlglot expression. 243 quote_identifiers: Whether to quote all identifiers in the query. 244 245 Returns: 246 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 247 """ 248 return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers) 249 250 def fetch_pyspark_df( 251 self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False 252 ) -> PySparkDataFrame: 253 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 254 255 Args: 256 query: SQL string or sqlglot expression. 257 quote_identifiers: Whether to quote all identifiers in the query. 258 259 Returns: 260 A PySpark dataframe. 261 """ 262 return self.engine_adapter.fetch_pyspark_df(query, quote_identifiers=quote_identifiers) 263 264 265class ExecutionContext(BaseContext): 266 """The minimal context needed to execute a model. 267 268 Args: 269 engine_adapter: The engine adapter to execute queries against. 270 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 271 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 272 """ 273 274 def __init__( 275 self, 276 engine_adapter: EngineAdapter, 277 snapshots: t.Dict[str, Snapshot], 278 deployability_index: t.Optional[DeployabilityIndex] = None, 279 default_dialect: t.Optional[str] = None, 280 default_catalog: t.Optional[str] = None, 281 is_restatement: t.Optional[bool] = None, 282 parent_intervals: t.Optional[Intervals] = None, 283 variables: t.Optional[t.Dict[str, t.Any]] = None, 284 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 285 ): 286 self.snapshots = snapshots 287 self.deployability_index = deployability_index 288 self._engine_adapter = engine_adapter 289 self._default_catalog = default_catalog 290 self._default_dialect = default_dialect 291 self._variables = variables or {} 292 self._blueprint_variables = blueprint_variables or {} 293 self._is_restatement = is_restatement 294 self._parent_intervals = parent_intervals 295 296 @property 297 def default_dialect(self) -> t.Optional[str]: 298 return self._default_dialect 299 300 @property 301 def engine_adapter(self) -> EngineAdapter: 302 """Returns an engine adapter.""" 303 return self._engine_adapter 304 305 @cached_property 306 def _model_tables(self) -> t.Dict[str, str]: 307 """Returns a mapping of model names to tables.""" 308 return to_table_mapping(self.snapshots.values(), self.deployability_index) 309 310 @property 311 def default_catalog(self) -> t.Optional[str]: 312 return self._default_catalog 313 314 @property 315 def gateway(self) -> t.Optional[str]: 316 """Returns the gateway name.""" 317 return self.var(c.GATEWAY) 318 319 @property 320 def is_restatement(self) -> t.Optional[bool]: 321 return self._is_restatement 322 323 @property 324 def parent_intervals(self) -> t.Optional[Intervals]: 325 return self._parent_intervals 326 327 def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 328 """Returns a variable value.""" 329 return self._variables.get(var_name.lower(), default) 330 331 def blueprint_var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 332 """Returns a blueprint variable value.""" 333 return self._blueprint_variables.get(var_name.lower(), default) 334 335 def with_variables( 336 self, 337 variables: t.Dict[str, t.Any], 338 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 339 ) -> ExecutionContext: 340 """Returns a new ExecutionContext with additional variables.""" 341 return ExecutionContext( 342 self._engine_adapter, 343 self.snapshots, 344 self.deployability_index, 345 self._default_dialect, 346 self._default_catalog, 347 self._is_restatement, 348 variables=variables, 349 blueprint_variables=blueprint_variables, 350 ) 351 352 353class GenericContext(BaseContext, t.Generic[C]): 354 """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. 355 356 Args: 357 notification_targets: The notification target to use. Defaults to what is defined in config. 358 paths: The directories containing SQLMesh files. 359 config: A Config object or the name of a Config object in config.py. 360 connection: The name of the connection. If not specified the first connection as it appears 361 in configuration will be used. 362 test_connection: The name of the connection to use for tests. If not specified the first 363 connection as it appears in configuration will be used. 364 concurrent_tasks: The maximum number of tasks that can use the connection concurrently. 365 load: Whether or not to automatically load all models and macros (default True). 366 console: The rich instance used for printing out CLI command results. 367 users: A list of users to make known to SQLMesh. 368 """ 369 370 CONFIG_TYPE: t.Type[C] 371 """The type of config object to use (default: Config).""" 372 373 PLAN_BUILDER_TYPE = PlanBuilder 374 """The type of plan builder object to use (default: PlanBuilder).""" 375 376 def __init__( 377 self, 378 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 379 state_sync: t.Optional[StateSync] = None, 380 paths: t.Union[str | Path, t.Iterable[str | Path]] = "", 381 config: t.Optional[t.Union[C, str, t.Dict[Path, C]]] = None, 382 gateway: t.Optional[str] = None, 383 concurrent_tasks: t.Optional[int] = None, 384 loader: t.Optional[t.Type[Loader]] = None, 385 load: bool = True, 386 users: t.Optional[t.List[User]] = None, 387 config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, 388 selector: t.Optional[t.Type[Selector]] = None, 389 ): 390 self.configs = ( 391 config 392 if isinstance(config, dict) 393 else load_configs(config, self.CONFIG_TYPE, paths, **(config_loader_kwargs or {})) 394 ) 395 self._projects = {config.project for config in self.configs.values()} 396 self.dag: DAG[str] = DAG() 397 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 398 self._audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("audits") 399 self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( 400 "standaloneaudits" 401 ) 402 self._model_test_metadata: t.List[ModelTestMetadata] = [] 403 self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} 404 self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} 405 self._models_with_tests: t.Set[str] = set() 406 407 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 408 self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") 409 self._jinja_macros = JinjaMacroRegistry() 410 self._requirements: t.Dict[str, str] = {} 411 self._environment_statements: t.List[EnvironmentStatements] = [] 412 self._excluded_requirements: t.Set[str] = set() 413 self._engine_adapter: t.Optional[EngineAdapter] = None 414 self._linters: t.Dict[str, Linter] = {} 415 self._loaded: bool = False 416 self._selector_cls = selector or NativeSelector 417 418 self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) 419 420 self._all_dialects: t.Set[str] = {self.config.dialect or ""} 421 422 if self.config.disable_anonymized_analytics: 423 analytics.disable_analytics() 424 425 self.gateway = gateway 426 self._scheduler = self.config.get_scheduler(self.gateway) 427 self.environment_ttl = self.config.environment_ttl 428 self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) 429 self.auto_categorize_changes = self.config.plan.auto_categorize_changes 430 self.selected_gateway = (gateway or self.config.default_gateway_name).lower() 431 432 gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults 433 if gw_model_defaults: 434 # Merge global model defaults with the selected gateway's, if it's overriden 435 global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) 436 gateway_defaults = gw_model_defaults.model_dump(exclude_unset=True) 437 438 self.config.model_defaults = ModelDefaultsConfig( 439 **{**global_defaults, **gateway_defaults} 440 ) 441 442 # This allows overriding the default dialect's normalization strategy, so for example 443 # one can do `dialect="duckdb,normalization_strategy=lowercase"` and this will be 444 # applied to the DuckDB dialect globally 445 if "normalization_strategy" in str(self.config.dialect): 446 dialect = Dialect.get_or_raise(self.config.dialect) 447 type(dialect).NORMALIZATION_STRATEGY = dialect.normalization_strategy 448 449 self._loaders = [ 450 (loader or config.loader)(self, path, **config.loader_kwargs) 451 for path, config in self.configs.items() 452 ] 453 454 self._concurrent_tasks = concurrent_tasks 455 self._state_connection_config = ( 456 self.config.get_state_connection(self.gateway) or self.connection_config 457 ) 458 459 self._snapshot_evaluator: t.Optional[SnapshotEvaluator] = None 460 461 self.console = get_console() 462 setattr(self.console, "dialect", self.config.dialect) 463 464 self._provided_state_sync: t.Optional[StateSync] = state_sync 465 self._state_sync: t.Optional[StateSync] = None 466 467 # Should we dedupe notification_targets? If so how? 468 self.notification_targets = (notification_targets or []) + self.config.notification_targets 469 self.users = (users or []) + self.config.users 470 self.users = list({user.username: user for user in self.users}.values()) 471 self._register_notification_targets() 472 473 if load: 474 self.load() 475 476 @property 477 def default_dialect(self) -> t.Optional[str]: 478 return self.config.dialect 479 480 @property 481 def engine_adapter(self) -> EngineAdapter: 482 """Returns the default engine adapter.""" 483 if self._engine_adapter is None: 484 self._engine_adapter = self.connection_config.create_engine_adapter() 485 return self._engine_adapter 486 487 @property 488 def snapshot_evaluator(self) -> SnapshotEvaluator: 489 if not self._snapshot_evaluator: 490 self._snapshot_evaluator = SnapshotEvaluator( 491 { 492 gateway: adapter.with_settings(execute_log_level=logging.INFO) 493 for gateway, adapter in self.engine_adapters.items() 494 }, 495 ddl_concurrent_tasks=self.concurrent_tasks, 496 selected_gateway=self.selected_gateway, 497 ) 498 return self._snapshot_evaluator 499 500 def execution_context( 501 self, 502 deployability_index: t.Optional[DeployabilityIndex] = None, 503 engine_adapter: t.Optional[EngineAdapter] = None, 504 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 505 ) -> ExecutionContext: 506 """Returns an execution context.""" 507 return ExecutionContext( 508 engine_adapter=engine_adapter or self.engine_adapter, 509 snapshots=snapshots or self.snapshots, 510 deployability_index=deployability_index, 511 default_dialect=self.default_dialect, 512 default_catalog=self.default_catalog, 513 ) 514 515 @python_api_analytics 516 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 517 """Update or insert a model. 518 519 The context's models dictionary will be updated to include these changes. 520 521 Args: 522 model: Model name or instance to update. 523 kwargs: The kwargs to update the model with. 524 525 Returns: 526 A new instance of the updated or inserted model. 527 """ 528 model = self.get_model(model, raise_if_missing=True) 529 if not model.enabled: 530 raise SQLMeshError(f"The disabled model '{model.name}' cannot be upserted") 531 path = model._path 532 533 model = model.copy(update=kwargs) 534 model._path = path 535 536 self.dag.add(model.fqn, model.depends_on) 537 538 self._models.update( 539 { 540 model.fqn: model, 541 # bust the fingerprint cache for all downstream models 542 **{fqn: self._models[fqn].copy() for fqn in self.dag.downstream(model.fqn)}, 543 } 544 ) 545 546 update_model_schemas( 547 self.dag, 548 models=self._models, 549 cache_dir=self.cache_dir, 550 ) 551 552 if model.dialect: 553 self._all_dialects.add(model.dialect) 554 555 model.validate_definition() 556 557 return model 558 559 def scheduler( 560 self, 561 environment: t.Optional[str] = None, 562 snapshot_evaluator: t.Optional[SnapshotEvaluator] = None, 563 ) -> Scheduler: 564 """Returns the built-in scheduler. 565 566 Args: 567 environment: The target environment to source model snapshots from, or None 568 if snapshots should be sourced from the currently loaded local state. 569 570 Returns: 571 The built-in scheduler instance. 572 """ 573 snapshots: t.Iterable[Snapshot] 574 if environment is not None: 575 stored_environment = self.state_sync.get_environment(environment) 576 if stored_environment is None: 577 raise ConfigError(f"Environment '{environment}' was not found.") 578 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 579 else: 580 snapshots = self.snapshots.values() 581 582 if not snapshots: 583 raise ConfigError("No models were found") 584 585 return self.create_scheduler(snapshots, snapshot_evaluator or self.snapshot_evaluator) 586 587 def create_scheduler( 588 self, snapshots: t.Iterable[Snapshot], snapshot_evaluator: SnapshotEvaluator 589 ) -> Scheduler: 590 """Creates the built-in scheduler. 591 592 Args: 593 snapshots: The snapshots to schedule. 594 595 Returns: 596 The built-in scheduler instance. 597 """ 598 return Scheduler( 599 snapshots, 600 snapshot_evaluator, 601 self.state_sync, 602 default_catalog=self.default_catalog, 603 max_workers=self.concurrent_tasks, 604 console=self.console, 605 notification_target_manager=self.notification_target_manager, 606 ) 607 608 @property 609 def state_sync(self) -> StateSync: 610 if not self._state_sync: 611 self._state_sync = self._new_state_sync() 612 613 if self._state_sync.get_versions(validate=False).schema_version == 0: 614 self.console.log_status_update("Initializing new project state...") 615 self._state_sync.migrate() 616 self._state_sync.get_versions() 617 self._state_sync = CachingStateSync(self._state_sync) # type: ignore 618 return self._state_sync 619 620 @property 621 def state_reader(self) -> StateReader: 622 return self.state_sync 623 624 def refresh(self) -> None: 625 """Refresh all models that have been updated.""" 626 if any(loader.reload_needed() for loader in self._loaders): 627 self.load() 628 629 def load(self, update_schemas: bool = True) -> GenericContext[C]: 630 """Load all files in the context's path.""" 631 load_start_ts = time.perf_counter() 632 633 loaded_projects = [loader.load() for loader in self._loaders] 634 635 self.dag = DAG() 636 self._standalone_audits.clear() 637 self._audits.clear() 638 self._macros.clear() 639 self._models.clear() 640 self._metrics.clear() 641 self._requirements.clear() 642 self._excluded_requirements.clear() 643 self._linters.clear() 644 self._environment_statements = [] 645 self._model_test_metadata.clear() 646 self._model_test_metadata_path_index.clear() 647 self._model_test_metadata_fully_qualified_name_index.clear() 648 self._models_with_tests.clear() 649 650 for loader, project in zip(self._loaders, loaded_projects): 651 self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) 652 self._macros.update(project.macros) 653 self._models.update(project.models) 654 self._metrics.update(project.metrics) 655 self._audits.update(project.audits) 656 self._standalone_audits.update(project.standalone_audits) 657 self._requirements.update(project.requirements) 658 self._excluded_requirements.update(project.excluded_requirements) 659 self._environment_statements.extend(project.environment_statements) 660 661 self._model_test_metadata.extend(project.model_test_metadata) 662 for metadata in project.model_test_metadata: 663 if metadata.path not in self._model_test_metadata_path_index: 664 self._model_test_metadata_path_index[metadata.path] = [] 665 self._model_test_metadata_path_index[metadata.path].append(metadata) 666 self._model_test_metadata_fully_qualified_name_index[ 667 metadata.fully_qualified_test_name 668 ] = metadata 669 self._models_with_tests.add(metadata.model_name) 670 671 config = loader.config 672 self._linters[config.project] = Linter.from_rules( 673 BUILTIN_RULES.union(project.user_rules), config.linter 674 ) 675 676 # Load environment statements from state for projects not in current load 677 if any(self._projects): 678 prod = self.state_reader.get_environment(c.PROD) 679 if prod: 680 existing_statements = self.state_reader.get_environment_statements(c.PROD) 681 for stmt in existing_statements: 682 if stmt.project and stmt.project not in self._projects: 683 self._environment_statements.append(stmt) 684 685 uncached = set() 686 687 if any(self._projects): 688 prod = self.state_reader.get_environment(c.PROD) 689 690 if prod: 691 for snapshot in self.state_reader.get_snapshots(prod.snapshots).values(): 692 if snapshot.node.project in self._projects: 693 uncached.add(snapshot.name) 694 else: 695 store = self._standalone_audits if snapshot.is_audit else self._models 696 store[snapshot.name] = snapshot.node # type: ignore 697 698 for model in self._models.values(): 699 self.dag.add(model.fqn, model.depends_on) 700 701 if update_schemas: 702 for fqn in self.dag: 703 model = self._models.get(fqn) # type: ignore 704 705 if not model or fqn in uncached: 706 continue 707 708 # make a copy of remote models that depend on local models or in the downstream chain 709 # without this, a SELECT * FROM local will not propogate properly because the downstream 710 # model will get mutated (schema changes) but the object is the same as the remote cache 711 if any(dep in uncached for dep in model.depends_on): 712 uncached.add(fqn) 713 self._models.update({fqn: model.copy(update={"mapping_schema": {}})}) 714 continue 715 716 update_model_schemas( 717 self.dag, 718 models=self._models, 719 cache_dir=self.cache_dir, 720 ) 721 722 models = self.models.values() 723 for model in models: 724 # The model definition can be validated correctly only after the schema is set. 725 model.validate_definition() 726 727 duplicates = set(self._models) & set(self._standalone_audits) 728 if duplicates: 729 raise ConfigError( 730 f"Models and Standalone audits cannot have the same name: {duplicates}" 731 ) 732 733 self._all_dialects = {m.dialect for m in self._models.values() if m.dialect} | { 734 self.default_dialect or "" 735 } 736 737 analytics.collector.on_project_loaded( 738 project_type=self._project_type, 739 models_count=len(self._models), 740 audits_count=len(self._audits), 741 standalone_audits_count=len(self._standalone_audits), 742 macros_count=len(self._macros), 743 jinja_macros_count=len(self._jinja_macros.root_macros), 744 load_time_sec=time.perf_counter() - load_start_ts, 745 state_sync_fingerprint=self._scheduler.state_sync_fingerprint(self), 746 project_name=self.config.project, 747 ) 748 749 self._loaded = True 750 return self 751 752 @python_api_analytics 753 def run( 754 self, 755 environment: t.Optional[str] = None, 756 *, 757 start: t.Optional[TimeLike] = None, 758 end: t.Optional[TimeLike] = None, 759 execution_time: t.Optional[TimeLike] = None, 760 skip_janitor: bool = False, 761 ignore_cron: bool = False, 762 select_models: t.Optional[t.Collection[str]] = None, 763 exit_on_env_update: t.Optional[int] = None, 764 no_auto_upstream: bool = False, 765 ) -> CompletionStatus: 766 """Run the entire dag through the scheduler. 767 768 Args: 769 environment: The target environment to source model snapshots from and virtually update. Default: prod. 770 start: The start of the interval to render. 771 end: The end of the interval to render. 772 execution_time: The date/time time reference to use for execution time. Defaults to now. 773 skip_janitor: Whether to skip the janitor task. 774 ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals. 775 select_models: A list of model selection expressions to filter models that should run. Note that 776 upstream dependencies of selected models will also be evaluated. 777 exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update 778 to the target environment. 779 no_auto_upstream: Whether to not force upstream models to run. Only applicable when using `select_models`. 780 781 Returns: 782 True if the run was successful, False otherwise. 783 """ 784 environment = environment or self.config.default_target_environment 785 environment = Environment.sanitize_name(environment) 786 if not skip_janitor and environment.lower() == c.PROD: 787 self._run_janitor() 788 789 self.notification_target_manager.notify( 790 NotificationEvent.RUN_START, environment=environment 791 ) 792 analytics_run_id = analytics.collector.on_run_start( 793 engine_type=self.snapshot_evaluator.adapter.dialect, 794 state_sync_type=self.state_sync.state_type(), 795 ) 796 self._load_materializations() 797 798 env_check_attempts_num = max( 799 1, 800 self.config.run.environment_check_max_wait 801 // self.config.run.environment_check_interval, 802 ) 803 804 def _block_until_finalized() -> str: 805 for _ in range(env_check_attempts_num): 806 assert environment is not None # mypy 807 environment_state = self.state_sync.get_environment(environment) 808 if not environment_state: 809 raise SQLMeshError(f"Environment '{environment}' was not found.") 810 if environment_state.finalized_ts: 811 return environment_state.plan_id 812 self.console.log_warning( 813 f"Environment '{environment}' is being updated by plan '{environment_state.plan_id}'. " 814 f"Retrying in {self.config.run.environment_check_interval} seconds..." 815 ) 816 time.sleep(self.config.run.environment_check_interval) 817 raise SQLMeshError( 818 f"Exceeded the maximum wait time for environment '{environment}' to be ready. " 819 "This means that the environment either failed to update or the update is taking longer than expected. " 820 "See https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#run to adjust the timeout settings." 821 ) 822 823 success = False 824 interrupted = False 825 done = False 826 while not done: 827 plan_id_at_start = _block_until_finalized() 828 829 def _has_environment_changed() -> bool: 830 assert environment is not None # mypy 831 current_environment_state = self.state_sync.get_environment(environment) 832 return ( 833 not current_environment_state 834 or current_environment_state.plan_id != plan_id_at_start 835 or not current_environment_state.finalized_ts 836 ) 837 838 try: 839 completion_status = self._run( 840 environment, 841 start=start, 842 end=end, 843 execution_time=execution_time, 844 ignore_cron=ignore_cron, 845 select_models=select_models, 846 circuit_breaker=_has_environment_changed, 847 no_auto_upstream=no_auto_upstream, 848 ) 849 done = True 850 except CircuitBreakerError: 851 self.console.log_warning( 852 f"Environment '{environment}' modified while running. Restarting the run..." 853 ) 854 if exit_on_env_update: 855 interrupted = True 856 done = True 857 except Exception as e: 858 self.notification_target_manager.notify( 859 NotificationEvent.RUN_FAILURE, traceback.format_exc() 860 ) 861 logger.info("Run failed.", exc_info=e) 862 analytics.collector.on_run_end( 863 run_id=analytics_run_id, succeeded=False, interrupted=False, error=e 864 ) 865 raise e 866 867 if completion_status.is_success or interrupted: 868 self.notification_target_manager.notify( 869 NotificationEvent.RUN_END, environment=environment 870 ) 871 self.console.log_success(f"Run finished for environment '{environment}'") 872 elif completion_status.is_failure: 873 self.notification_target_manager.notify( 874 NotificationEvent.RUN_FAILURE, "See console logs for details." 875 ) 876 877 analytics.collector.on_run_end( 878 run_id=analytics_run_id, succeeded=success, interrupted=interrupted 879 ) 880 881 if interrupted and exit_on_env_update is not None: 882 sys.exit(exit_on_env_update) 883 884 return completion_status 885 886 @python_api_analytics 887 def run_janitor(self, ignore_ttl: bool) -> bool: 888 success = False 889 890 if self.console.start_cleanup(ignore_ttl): 891 try: 892 self._run_janitor(ignore_ttl) 893 success = True 894 finally: 895 self.console.stop_cleanup(success=success) 896 897 return success 898 899 @python_api_analytics 900 def destroy(self) -> bool: 901 success = False 902 903 # Collect resources to be deleted 904 environments = self.state_reader.get_environments() 905 schemas_to_delete = set() 906 tables_to_delete = set() 907 views_to_delete = set() 908 all_snapshot_infos = set() 909 910 # For each environment find schemas and tables 911 for environment in environments: 912 all_snapshot_infos.update(environment.snapshots) 913 snapshots = self.state_reader.get_snapshots(environment.snapshots).values() 914 for snapshot in snapshots: 915 if snapshot.is_model and not snapshot.is_symbolic: 916 # Get the appropriate adapter 917 if environment.gateway_managed and snapshot.model_gateway: 918 adapter = self.engine_adapters.get( 919 snapshot.model_gateway, self.engine_adapter 920 ) 921 else: 922 adapter = self.engine_adapter 923 924 if environment.suffix_target.is_schema or environment.suffix_target.is_catalog: 925 schema = snapshot.qualified_view_name.schema_for_environment( 926 environment.naming_info, dialect=adapter.dialect 927 ) 928 catalog = snapshot.qualified_view_name.catalog_for_environment( 929 environment.naming_info, dialect=adapter.dialect 930 ) 931 if catalog: 932 schemas_to_delete.add(f"{catalog}.{schema}") 933 else: 934 schemas_to_delete.add(schema) 935 936 if environment.suffix_target.is_table: 937 view_name = snapshot.qualified_view_name.for_environment( 938 environment.naming_info, dialect=adapter.dialect 939 ) 940 views_to_delete.add(view_name) 941 942 # Add snapshot tables 943 table_name = snapshot.table_name() 944 tables_to_delete.add(table_name) 945 946 if self.console.start_destroy(schemas_to_delete, views_to_delete, tables_to_delete): 947 try: 948 success = self._destroy() 949 finally: 950 self.console.stop_destroy(success=success) 951 952 return success 953 954 @t.overload 955 def get_model( 956 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: Literal[True] = True 957 ) -> Model: ... 958 959 @t.overload 960 def get_model( 961 self, 962 model_or_snapshot: ModelOrSnapshot, 963 raise_if_missing: Literal[False] = False, 964 ) -> t.Optional[Model]: ... 965 966 def get_model( 967 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: bool = False 968 ) -> t.Optional[Model]: 969 """Returns a model with the given name or None if a model with such name doesn't exist. 970 971 Args: 972 model_or_snapshot: A model name, model, or snapshot. 973 raise_if_missing: Raises an error if a model is not found. 974 975 Returns: 976 The expected model. 977 """ 978 if isinstance(model_or_snapshot, Snapshot): 979 return model_or_snapshot.model 980 if not isinstance(model_or_snapshot, str): 981 return model_or_snapshot 982 983 try: 984 # We should try all dialects referenced in the project for cases when models use mixed dialects. 985 for dialect in self._all_dialects: 986 normalized_name = normalize_model_name( 987 model_or_snapshot, 988 dialect=dialect, 989 default_catalog=self.default_catalog, 990 ) 991 if normalized_name in self._models: 992 return self._models[normalized_name] 993 except: 994 pass 995 996 if raise_if_missing: 997 if model_or_snapshot.endswith((".sql", ".py")): 998 msg = "Resolving models by path is not supported, please pass in the model name instead." 999 else: 1000 msg = f"Cannot find model with name '{model_or_snapshot}'" 1001 1002 raise SQLMeshError(msg) 1003 1004 return None 1005 1006 @t.overload 1007 def get_snapshot(self, node_or_snapshot: NodeOrSnapshot) -> t.Optional[Snapshot]: ... 1008 1009 @t.overload 1010 def get_snapshot( 1011 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[True] 1012 ) -> Snapshot: ... 1013 1014 @t.overload 1015 def get_snapshot( 1016 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[False] 1017 ) -> t.Optional[Snapshot]: ... 1018 1019 def get_snapshot( 1020 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: bool = False 1021 ) -> t.Optional[Snapshot]: 1022 """Returns a snapshot with the given name or None if a snapshot with such name doesn't exist. 1023 1024 Args: 1025 node_or_snapshot: A node name, node, or snapshot. 1026 raise_if_missing: Raises an error if a snapshot is not found. 1027 1028 Returns: 1029 The expected snapshot. 1030 """ 1031 if isinstance(node_or_snapshot, Snapshot): 1032 return node_or_snapshot 1033 fqn = self._node_or_snapshot_to_fqn(node_or_snapshot) 1034 snapshot = self.snapshots.get(fqn) 1035 1036 if raise_if_missing and not snapshot: 1037 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1038 1039 return snapshot 1040 1041 def config_for_path(self, path: Path) -> t.Tuple[Config, Path]: 1042 """Returns the config and path of the said project for a given file path.""" 1043 for config_path, config in self.configs.items(): 1044 try: 1045 path.relative_to(config_path) 1046 return config, config_path 1047 except ValueError: 1048 pass 1049 return self.config, self.path 1050 1051 def config_for_node(self, node: Model | Audit) -> Config: 1052 path = node._path 1053 if path is None: 1054 return self.config 1055 return self.config_for_path(path)[0] # type: ignore 1056 1057 @property 1058 def models(self) -> MappingProxyType[str, Model]: 1059 """Returns all registered models in this context.""" 1060 return MappingProxyType(self._models) 1061 1062 @property 1063 def metrics(self) -> MappingProxyType[str, Metric]: 1064 """Returns all registered metrics in this context.""" 1065 return MappingProxyType(self._metrics) 1066 1067 @property 1068 def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: 1069 """Returns all registered standalone audits in this context.""" 1070 return MappingProxyType(self._standalone_audits) 1071 1072 @property 1073 def models_with_tests(self) -> t.Set[str]: 1074 """Returns all models with tests in this context.""" 1075 return self._models_with_tests 1076 1077 @property 1078 def snapshots(self) -> t.Dict[str, Snapshot]: 1079 """Generates and returns snapshots based on models registered in this context. 1080 1081 If one of the snapshots has been previously stored in the persisted state, the stored 1082 instance will be returned. 1083 """ 1084 return self._snapshots() 1085 1086 @property 1087 def requirements(self) -> t.Dict[str, str]: 1088 """Returns the Python dependencies of the project loaded in this context.""" 1089 return self._requirements.copy() 1090 1091 @cached_property 1092 def default_catalog(self) -> t.Optional[str]: 1093 return self.default_catalog_per_gateway.get(self.selected_gateway) 1094 1095 @python_api_analytics 1096 def render( 1097 self, 1098 model_or_snapshot: ModelOrSnapshot, 1099 *, 1100 start: t.Optional[TimeLike] = None, 1101 end: t.Optional[TimeLike] = None, 1102 execution_time: t.Optional[TimeLike] = None, 1103 expand: t.Union[bool, t.Iterable[str]] = False, 1104 **kwargs: t.Any, 1105 ) -> exp.Expression: 1106 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 1107 1108 Args: 1109 model_or_snapshot: The model, model name, or snapshot to render. 1110 start: The start of the interval to render. 1111 end: The end of the interval to render. 1112 execution_time: The date/time time reference to use for execution time. Defaults to now. 1113 expand: Whether or not to use expand materialized models, defaults to False. 1114 If True, all referenced models are expanded as raw queries. 1115 If a list, only referenced models are expanded as raw queries. 1116 1117 Returns: 1118 The rendered expression. 1119 """ 1120 execution_time = execution_time or now() 1121 1122 model = self.get_model(model_or_snapshot, raise_if_missing=True) 1123 1124 if expand and not isinstance(expand, bool): 1125 expand = { 1126 normalize_model_name( 1127 x, default_catalog=self.default_catalog, dialect=self.default_dialect 1128 ) 1129 for x in expand 1130 } 1131 1132 expand = self.dag.upstream(model.fqn) if expand is True else expand or [] 1133 1134 if model.is_seed: 1135 import pandas as pd 1136 1137 df = next( 1138 model.render( 1139 context=self.execution_context( 1140 engine_adapter=self._get_engine_adapter(model.gateway) 1141 ), 1142 start=start, 1143 end=end, 1144 execution_time=execution_time, 1145 **kwargs, 1146 ) 1147 ) 1148 return next(pandas_to_sql(t.cast(pd.DataFrame, df), model.columns_to_types)) 1149 1150 snapshots = self.snapshots 1151 deployability_index = DeployabilityIndex.create(snapshots.values(), start=start) 1152 1153 return model.render_query_or_raise( 1154 start=start, 1155 end=end, 1156 execution_time=execution_time, 1157 snapshots=snapshots, 1158 expand=expand, 1159 deployability_index=deployability_index, 1160 engine_adapter=self._get_engine_adapter(model.gateway), 1161 **kwargs, 1162 ) 1163 1164 @python_api_analytics 1165 def evaluate( 1166 self, 1167 model_or_snapshot: ModelOrSnapshot, 1168 start: TimeLike, 1169 end: TimeLike, 1170 execution_time: TimeLike, 1171 limit: t.Optional[int] = None, 1172 **kwargs: t.Any, 1173 ) -> DF: 1174 """Evaluate a model or snapshot (running its query against a DB/Engine). 1175 1176 This method is used to test or iterate on models without side effects. 1177 1178 Args: 1179 model_or_snapshot: The model, model name, or snapshot to render. 1180 start: The start of the interval to evaluate. 1181 end: The end of the interval to evaluate. 1182 execution_time: The date/time time reference to use for execution time. 1183 limit: A limit applied to the model. 1184 """ 1185 snapshots = self.snapshots 1186 fqn = self._node_or_snapshot_to_fqn(model_or_snapshot) 1187 if fqn not in snapshots: 1188 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1189 snapshot = snapshots[fqn] 1190 1191 # Expand all uncategorized parents since physical tables don't exist for them yet 1192 expand = [ 1193 parent 1194 for parent in self.dag.upstream(snapshot.model.fqn) 1195 if (parent_snapshot := snapshots.get(parent)) 1196 and parent_snapshot.is_model 1197 and parent_snapshot.model.is_sql 1198 and not parent_snapshot.categorized 1199 ] 1200 1201 df = self.snapshot_evaluator.evaluate_and_fetch( 1202 snapshot, 1203 start=start, 1204 end=end, 1205 execution_time=execution_time, 1206 snapshots=self.snapshots, 1207 limit=limit or c.DEFAULT_MAX_LIMIT, 1208 expand=expand, 1209 ) 1210 1211 if df is None: 1212 raise RuntimeError(f"Error evaluating {snapshot.name}") 1213 1214 return df 1215 1216 @python_api_analytics 1217 def format( 1218 self, 1219 transpile: t.Optional[str] = None, 1220 rewrite_casts: t.Optional[bool] = None, 1221 append_newline: t.Optional[bool] = None, 1222 *, 1223 check: t.Optional[bool] = None, 1224 paths: t.Optional[t.Tuple[t.Union[str, Path], ...]] = None, 1225 **kwargs: t.Any, 1226 ) -> bool: 1227 """Format all SQL models and audits.""" 1228 filtered_targets = [ 1229 target 1230 for target in chain(self._models.values(), self._audits.values()) 1231 if target._path is not None 1232 and target._path.suffix == ".sql" 1233 and (not paths or any(target._path.samefile(p) for p in paths)) 1234 ] 1235 unformatted_file_paths = [] 1236 1237 for target in filtered_targets: 1238 if ( 1239 target._path is None or target.formatting is False 1240 ): # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop 1241 continue 1242 1243 with open(target._path, "r+", encoding="utf-8") as file: 1244 before = file.read() 1245 1246 after = self._format( 1247 target, 1248 before, 1249 transpile=transpile, 1250 rewrite_casts=rewrite_casts, 1251 append_newline=append_newline, 1252 **kwargs, 1253 ) 1254 1255 if not check: 1256 file.seek(0) 1257 file.write(after) 1258 file.truncate() 1259 elif before != after: 1260 unformatted_file_paths.append(target._path) 1261 1262 if unformatted_file_paths: 1263 for path in unformatted_file_paths: 1264 self.console.log_status_update(f"{path} needs reformatting.") 1265 self.console.log_status_update( 1266 f"\n{len(unformatted_file_paths)} file(s) need reformatting." 1267 ) 1268 return False 1269 1270 return True 1271 1272 def _format( 1273 self, 1274 target: Model | Audit, 1275 before: str, 1276 *, 1277 transpile: t.Optional[str] = None, 1278 rewrite_casts: t.Optional[bool] = None, 1279 append_newline: t.Optional[bool] = None, 1280 **kwargs: t.Any, 1281 ) -> str: 1282 expressions = parse(before, default_dialect=self.config_for_node(target).dialect) 1283 if transpile and is_meta_expression(expressions[0]): 1284 for prop in expressions[0].expressions: 1285 if prop.name.lower() == "dialect": 1286 prop.replace( 1287 exp.Property( 1288 this="dialect", 1289 value=exp.Literal.string(transpile or target.dialect), 1290 ) 1291 ) 1292 1293 format_config = self.config_for_node(target).format 1294 after = format_model_expressions( 1295 expressions, 1296 transpile or target.dialect, 1297 rewrite_casts=( 1298 rewrite_casts if rewrite_casts is not None else not format_config.no_rewrite_casts 1299 ), 1300 **{**format_config.generator_options, **kwargs}, 1301 ) 1302 1303 if append_newline is None: 1304 append_newline = format_config.append_newline 1305 if append_newline: 1306 after += "\n" 1307 1308 return after 1309 1310 @python_api_analytics 1311 def plan( 1312 self, 1313 environment: t.Optional[str] = None, 1314 *, 1315 start: t.Optional[TimeLike] = None, 1316 end: t.Optional[TimeLike] = None, 1317 execution_time: t.Optional[TimeLike] = None, 1318 create_from: t.Optional[str] = None, 1319 skip_tests: t.Optional[bool] = None, 1320 restate_models: t.Optional[t.Iterable[str]] = None, 1321 no_gaps: t.Optional[bool] = None, 1322 skip_backfill: t.Optional[bool] = None, 1323 empty_backfill: t.Optional[bool] = None, 1324 forward_only: t.Optional[bool] = None, 1325 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1326 allow_additive_models: t.Optional[t.Collection[str]] = None, 1327 no_prompts: t.Optional[bool] = None, 1328 auto_apply: t.Optional[bool] = None, 1329 no_auto_categorization: t.Optional[bool] = None, 1330 effective_from: t.Optional[TimeLike] = None, 1331 include_unmodified: t.Optional[bool] = None, 1332 select_models: t.Optional[t.Collection[str]] = None, 1333 backfill_models: t.Optional[t.Collection[str]] = None, 1334 categorizer_config: t.Optional[CategorizerConfig] = None, 1335 enable_preview: t.Optional[bool] = None, 1336 no_diff: t.Optional[bool] = None, 1337 run: t.Optional[bool] = None, 1338 diff_rendered: t.Optional[bool] = None, 1339 skip_linter: t.Optional[bool] = None, 1340 explain: t.Optional[bool] = None, 1341 ignore_cron: t.Optional[bool] = None, 1342 min_intervals: t.Optional[int] = None, 1343 ) -> Plan: 1344 """Interactively creates a plan. 1345 1346 This method compares the current context with the target environment. It then presents 1347 the differences and asks whether to backfill each modified model. 1348 1349 Args: 1350 environment: The environment to diff and plan against. 1351 start: The start date of the backfill if there is one. 1352 end: The end date of the backfill if there is one. 1353 execution_time: The date/time reference to use for execution time. Defaults to now. 1354 create_from: The environment to create the target environment from if it 1355 doesn't exist. If not specified, the "prod" environment will be used. 1356 skip_tests: Unit tests are run by default so this will skip them if enabled 1357 restate_models: A list of either internal or external models, or tags, that need to be restated 1358 for the given plan interval. If the target environment is a production environment, 1359 ALL snapshots that depended on these upstream tables will have their intervals deleted 1360 (even ones not in this current environment). Only the snapshots in this environment will 1361 be backfilled whereas others need to be recovered on a future plan application. For development 1362 environments only snapshots that are part of this plan will be affected. 1363 no_gaps: Whether to ensure that new snapshots for models that are already a 1364 part of the target environment have no data gaps when compared against previous 1365 snapshots for same models. 1366 skip_backfill: Whether to skip the backfill step. Default: False. 1367 empty_backfill: Like skip_backfill, but also records processed intervals. 1368 forward_only: Whether the purpose of the plan is to make forward only changes. 1369 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1370 allow_additive_models: Models whose forward-only changes are allowed to be additive. 1371 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 1372 if this flag is set to true and there are uncategorized changes the plan creation will 1373 fail. Default: False. 1374 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 1375 no_auto_categorization: Indicates whether to disable automatic categorization of model 1376 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1377 option determines the behavior. 1378 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1379 project config by default. 1380 effective_from: The effective date from which to apply forward-only changes on production. 1381 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1382 select_models: A list of model selection strings to filter the models that should be included into this plan. 1383 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1384 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1385 no_diff: Hide text differences for changed models. 1386 run: Whether to run latest intervals as part of the plan application. 1387 diff_rendered: Whether the diff should compare raw vs rendered models 1388 skip_linter: Linter runs by default so this will skip it if enabled 1389 explain: Whether to explain the plan instead of applying it. 1390 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1391 on every model when checking for missing intervals 1392 1393 Returns: 1394 The populated Plan object. 1395 """ 1396 plan_builder = self.plan_builder( 1397 environment, 1398 start=start, 1399 end=end, 1400 execution_time=execution_time, 1401 create_from=create_from, 1402 skip_tests=skip_tests, 1403 restate_models=restate_models, 1404 no_gaps=no_gaps, 1405 skip_backfill=skip_backfill, 1406 empty_backfill=empty_backfill, 1407 forward_only=forward_only, 1408 allow_destructive_models=allow_destructive_models, 1409 allow_additive_models=allow_additive_models, 1410 no_auto_categorization=no_auto_categorization, 1411 effective_from=effective_from, 1412 include_unmodified=include_unmodified, 1413 select_models=select_models, 1414 backfill_models=backfill_models, 1415 categorizer_config=categorizer_config, 1416 enable_preview=enable_preview, 1417 run=run, 1418 diff_rendered=diff_rendered, 1419 skip_linter=skip_linter, 1420 explain=explain, 1421 ignore_cron=ignore_cron, 1422 min_intervals=min_intervals, 1423 ) 1424 1425 plan = plan_builder.build() 1426 1427 if no_auto_categorization or plan.uncategorized: 1428 # Prompts are required if the auto categorization is disabled 1429 # or if there are any uncategorized snapshots in the plan 1430 no_prompts = False 1431 1432 if explain: 1433 auto_apply = True 1434 1435 self.console.plan( 1436 plan_builder, 1437 auto_apply if auto_apply is not None else self.config.plan.auto_apply, 1438 self.default_catalog, 1439 no_diff=no_diff if no_diff is not None else self.config.plan.no_diff, 1440 no_prompts=no_prompts if no_prompts is not None else self.config.plan.no_prompts, 1441 ) 1442 1443 return plan 1444 1445 @python_api_analytics 1446 def plan_builder( 1447 self, 1448 environment: t.Optional[str] = None, 1449 *, 1450 start: t.Optional[TimeLike] = None, 1451 end: t.Optional[TimeLike] = None, 1452 execution_time: t.Optional[TimeLike] = None, 1453 create_from: t.Optional[str] = None, 1454 skip_tests: t.Optional[bool] = None, 1455 restate_models: t.Optional[t.Iterable[str]] = None, 1456 no_gaps: t.Optional[bool] = None, 1457 skip_backfill: t.Optional[bool] = None, 1458 empty_backfill: t.Optional[bool] = None, 1459 forward_only: t.Optional[bool] = None, 1460 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1461 allow_additive_models: t.Optional[t.Collection[str]] = None, 1462 no_auto_categorization: t.Optional[bool] = None, 1463 effective_from: t.Optional[TimeLike] = None, 1464 include_unmodified: t.Optional[bool] = None, 1465 select_models: t.Optional[t.Collection[str]] = None, 1466 backfill_models: t.Optional[t.Collection[str]] = None, 1467 categorizer_config: t.Optional[CategorizerConfig] = None, 1468 enable_preview: t.Optional[bool] = None, 1469 run: t.Optional[bool] = None, 1470 diff_rendered: t.Optional[bool] = None, 1471 skip_linter: t.Optional[bool] = None, 1472 explain: t.Optional[bool] = None, 1473 ignore_cron: t.Optional[bool] = None, 1474 min_intervals: t.Optional[int] = None, 1475 always_include_local_changes: t.Optional[bool] = None, 1476 ) -> PlanBuilder: 1477 """Creates a plan builder. 1478 1479 Args: 1480 environment: The environment to diff and plan against. 1481 start: The start date of the backfill if there is one. 1482 end: The end date of the backfill if there is one. 1483 execution_time: The date/time reference to use for execution time. Defaults to now. 1484 create_from: The environment to create the target environment from if it 1485 doesn't exist. If not specified, the "prod" environment will be used. 1486 skip_tests: Unit tests are run by default so this will skip them if enabled 1487 restate_models: A list of either internal or external models, or tags, that need to be restated 1488 for the given plan interval. If the target environment is a production environment, 1489 ALL snapshots that depended on these upstream tables will have their intervals deleted 1490 (even ones not in this current environment). Only the snapshots in this environment will 1491 be backfilled whereas others need to be recovered on a future plan application. For development 1492 environments only snapshots that are part of this plan will be affected. 1493 no_gaps: Whether to ensure that new snapshots for models that are already a 1494 part of the target environment have no data gaps when compared against previous 1495 snapshots for same models. 1496 skip_backfill: Whether to skip the backfill step. Default: False. 1497 empty_backfill: Like skip_backfill, but also records processed intervals. 1498 forward_only: Whether the purpose of the plan is to make forward only changes. 1499 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1500 no_auto_categorization: Indicates whether to disable automatic categorization of model 1501 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1502 option determines the behavior. 1503 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1504 project config by default. 1505 effective_from: The effective date from which to apply forward-only changes on production. 1506 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1507 select_models: A list of model selection strings to filter the models that should be included into this plan. 1508 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1509 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1510 run: Whether to run latest intervals as part of the plan application. 1511 diff_rendered: Whether the diff should compare raw vs rendered models 1512 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1513 on every model when checking for missing intervals 1514 always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. 1515 However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. 1516 1517 Returns: 1518 The plan builder. 1519 """ 1520 kwargs: t.Dict[str, t.Optional[UserProvidedFlags]] = { 1521 "start": start, 1522 "end": end, 1523 "execution_time": execution_time, 1524 "create_from": create_from, 1525 "skip_tests": skip_tests, 1526 "restate_models": list(restate_models) if restate_models is not None else None, 1527 "no_gaps": no_gaps, 1528 "skip_backfill": skip_backfill, 1529 "empty_backfill": empty_backfill, 1530 "forward_only": forward_only, 1531 "allow_destructive_models": list(allow_destructive_models) 1532 if allow_destructive_models is not None 1533 else None, 1534 "allow_additive_models": list(allow_additive_models) 1535 if allow_additive_models is not None 1536 else None, 1537 "no_auto_categorization": no_auto_categorization, 1538 "effective_from": effective_from, 1539 "include_unmodified": include_unmodified, 1540 "select_models": list(select_models) if select_models is not None else None, 1541 "backfill_models": list(backfill_models) if backfill_models is not None else None, 1542 "enable_preview": enable_preview, 1543 "run": run, 1544 "diff_rendered": diff_rendered, 1545 "skip_linter": skip_linter, 1546 "min_intervals": min_intervals, 1547 } 1548 user_provided_flags: t.Dict[str, UserProvidedFlags] = { 1549 k: v for k, v in kwargs.items() if v is not None 1550 } 1551 1552 skip_tests = explain or skip_tests or False 1553 no_gaps = no_gaps or False 1554 skip_backfill = skip_backfill or False 1555 empty_backfill = empty_backfill or False 1556 run = run or False 1557 diff_rendered = diff_rendered or False 1558 skip_linter = skip_linter or False 1559 1560 environment = environment or self.config.default_target_environment 1561 environment = Environment.sanitize_name(environment) 1562 is_dev = environment != c.PROD 1563 1564 if include_unmodified is None: 1565 include_unmodified = self.config.plan.include_unmodified 1566 1567 if skip_backfill and not no_gaps and not is_dev: 1568 # note: we deliberately don't mention the --no-gaps flag in case the plan came from the sqlmesh_dbt command 1569 # todo: perhaps we could have better error messages if we check sys.argv[0] for which cli is running? 1570 self.console.log_warning( 1571 "Skipping the backfill stage for production can lead to unexpected results, such as tables being empty or incremental data with non-contiguous time ranges being made available.\n" 1572 "If you are doing this deliberately to create an empty version of a table to test a change, please consider using Virtual Data Environments instead." 1573 ) 1574 1575 if not skip_linter: 1576 self.lint_models() 1577 1578 self._run_plan_tests(skip_tests=skip_tests) 1579 1580 environment_ttl = ( 1581 self.environment_ttl if environment not in self.pinned_environments else None 1582 ) 1583 1584 model_selector = self._new_selector() 1585 1586 if allow_destructive_models: 1587 expanded_destructive_models = model_selector.expand_model_selections( 1588 allow_destructive_models 1589 ) 1590 else: 1591 expanded_destructive_models = None 1592 1593 if allow_additive_models: 1594 expanded_additive_models = model_selector.expand_model_selections(allow_additive_models) 1595 else: 1596 expanded_additive_models = None 1597 1598 if backfill_models: 1599 backfill_models = model_selector.expand_model_selections(backfill_models) 1600 else: 1601 backfill_models = None 1602 1603 models_override: t.Optional[UniqueKeyDict[str, Model]] = None 1604 if select_models: 1605 try: 1606 models_override = model_selector.select_models( 1607 select_models, 1608 environment, 1609 fallback_env_name=create_from or c.PROD, 1610 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1611 ) 1612 except SQLMeshError as e: 1613 logger.exception(e) # ensure the full stack trace is logged 1614 raise PlanError( 1615 f"{e}\nCheck the SQLMesh log file for the full stack trace.\nIf the model has been fixed locally, please ensure that the --select-model expression includes it." 1616 ) 1617 if not backfill_models: 1618 # Only backfill selected models unless explicitly specified. 1619 backfill_models = model_selector.expand_model_selections(select_models) 1620 1621 expanded_restate_models = None 1622 if restate_models is not None: 1623 expanded_restate_models = model_selector.expand_model_selections(restate_models) 1624 1625 if (restate_models is not None and not expanded_restate_models) or ( 1626 backfill_models is not None and not backfill_models 1627 ): 1628 raise PlanError( 1629 "Selector did not return any models. Please check your model selection and try again." 1630 ) 1631 1632 if always_include_local_changes is None: 1633 # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes 1634 force_no_diff = restate_models is not None or ( 1635 backfill_models is not None and not backfill_models 1636 ) 1637 else: 1638 force_no_diff = not always_include_local_changes 1639 1640 snapshots = self._snapshots(models_override) 1641 context_diff = self._context_diff( 1642 environment or c.PROD, 1643 snapshots=snapshots, 1644 create_from=create_from, 1645 force_no_diff=force_no_diff, 1646 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1647 diff_rendered=diff_rendered, 1648 always_recreate_environment=self.config.plan.always_recreate_environment, 1649 ) 1650 modified_model_names = { 1651 *context_diff.modified_snapshots, 1652 *[s.name for s in context_diff.added], 1653 } 1654 1655 if ( 1656 is_dev 1657 and not include_unmodified 1658 and backfill_models is None 1659 and expanded_restate_models is None 1660 ): 1661 # Only backfill modified and added models. 1662 # This ensures that no models outside the impacted sub-DAG(s) will be backfilled unexpectedly. 1663 backfill_models = modified_model_names or None 1664 1665 max_interval_end_per_model = None 1666 default_start, default_end = None, None 1667 if not run: 1668 ignore_cron = False 1669 max_interval_end_per_model = self._get_max_interval_end_per_model( 1670 snapshots, backfill_models 1671 ) 1672 # If no end date is specified, use the max interval end from prod 1673 # to prevent unintended evaluation of the entire DAG. 1674 default_start, default_end = self._get_plan_default_start_end( 1675 snapshots, 1676 max_interval_end_per_model, 1677 backfill_models, 1678 modified_model_names, 1679 execution_time or now(), 1680 ) 1681 1682 # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model. 1683 self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values()) 1684 1685 start_override_per_model = self._calculate_start_override_per_model( 1686 min_intervals, 1687 start or default_start, 1688 end or default_end, 1689 execution_time or now(), 1690 backfill_models, 1691 snapshots, 1692 max_interval_end_per_model, 1693 ) 1694 1695 if not self.config.virtual_environment_mode.is_full: 1696 forward_only = True 1697 elif forward_only is None: 1698 forward_only = self.config.plan.forward_only 1699 1700 # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments 1701 # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition 1702 restate_all_snapshots = ( 1703 expanded_restate_models is not None 1704 and not is_dev 1705 and self.config.virtual_environment_mode.is_full 1706 ) 1707 1708 return self.PLAN_BUILDER_TYPE( 1709 context_diff=context_diff, 1710 start=start, 1711 end=end, 1712 execution_time=execution_time, 1713 apply=self.apply, 1714 restate_models=expanded_restate_models, 1715 restate_all_snapshots=restate_all_snapshots, 1716 backfill_models=backfill_models, 1717 no_gaps=no_gaps, 1718 skip_backfill=skip_backfill, 1719 empty_backfill=empty_backfill, 1720 is_dev=is_dev, 1721 forward_only=forward_only, 1722 allow_destructive_models=expanded_destructive_models, 1723 allow_additive_models=expanded_additive_models, 1724 environment_ttl=environment_ttl, 1725 environment_suffix_target=self.config.environment_suffix_target, 1726 environment_catalog_mapping=self.environment_catalog_mapping, 1727 categorizer_config=categorizer_config or self.auto_categorize_changes, 1728 auto_categorization_enabled=not no_auto_categorization, 1729 effective_from=effective_from, 1730 include_unmodified=include_unmodified, 1731 default_start=default_start, 1732 default_end=default_end, 1733 enable_preview=( 1734 enable_preview if enable_preview is not None else self._plan_preview_enabled 1735 ), 1736 end_bounded=not run, 1737 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1738 start_override_per_model=start_override_per_model, 1739 end_override_per_model=max_interval_end_per_model, 1740 console=self.console, 1741 user_provided_flags=user_provided_flags, 1742 selected_models={ 1743 dbt_unique_id 1744 for model in model_selector.expand_model_selections(select_models or "*") 1745 if (dbt_unique_id := snapshots[model].node.dbt_unique_id) 1746 }, 1747 explain=explain or False, 1748 ignore_cron=ignore_cron or False, 1749 ) 1750 1751 def apply( 1752 self, 1753 plan: Plan, 1754 circuit_breaker: t.Optional[t.Callable[[], bool]] = None, 1755 ) -> None: 1756 """Applies a plan by pushing snapshots and backfilling data. 1757 1758 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 1759 to backfill all models. 1760 1761 Args: 1762 plan: The plan to apply. 1763 circuit_breaker: An optional handler which checks if the apply should be aborted. 1764 """ 1765 if ( 1766 not plan.context_diff.has_changes 1767 and not plan.requires_backfill 1768 and not plan.has_unmodified_unpromoted 1769 ): 1770 return 1771 if plan.uncategorized: 1772 raise UncategorizedPlanError("Can't apply a plan with uncategorized changes.") 1773 1774 if plan.explain: 1775 explainer = PlanExplainer( 1776 state_reader=self.state_reader, 1777 default_catalog=self.default_catalog, 1778 console=self.console, 1779 ) 1780 explainer.evaluate(plan.to_evaluatable()) 1781 return 1782 1783 self.notification_target_manager.notify( 1784 NotificationEvent.APPLY_START, 1785 environment=plan.environment_naming_info.name, 1786 plan_id=plan.plan_id, 1787 ) 1788 try: 1789 self._apply(plan, circuit_breaker) 1790 except Exception as e: 1791 self.notification_target_manager.notify( 1792 NotificationEvent.APPLY_FAILURE, 1793 environment=plan.environment_naming_info.name, 1794 plan_id=plan.plan_id, 1795 exc=traceback.format_exc(), 1796 ) 1797 logger.info("Plan application failed.", exc_info=e) 1798 raise e 1799 self.notification_target_manager.notify( 1800 NotificationEvent.APPLY_END, 1801 environment=plan.environment_naming_info.name, 1802 plan_id=plan.plan_id, 1803 ) 1804 1805 @python_api_analytics 1806 def invalidate_environment(self, name: str, sync: bool = False) -> None: 1807 """Invalidates the target environment by setting its expiration timestamp to now. 1808 1809 Args: 1810 name: The name of the environment to invalidate. 1811 sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will 1812 be deleted asynchronously by the janitor process. 1813 """ 1814 name = Environment.sanitize_name(name) 1815 self.state_sync.invalidate_environment(name) 1816 if sync: 1817 self._cleanup_environments() 1818 self.console.log_success(f"Environment '{name}' deleted.") 1819 else: 1820 self.console.log_success(f"Environment '{name}' invalidated.") 1821 1822 @python_api_analytics 1823 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> bool: 1824 """Show a diff of the current context with a given environment. 1825 1826 Args: 1827 environment: The environment to diff against. 1828 detailed: Show the actual SQL differences if True. 1829 1830 Returns: 1831 True if there are changes, False otherwise. 1832 """ 1833 environment = environment or self.config.default_target_environment 1834 environment = Environment.sanitize_name(environment) 1835 context_diff = self._context_diff(environment) 1836 self.console.show_environment_difference_summary( 1837 context_diff, 1838 no_diff=not detailed, 1839 ) 1840 if context_diff.has_changes: 1841 self.console.show_model_difference_summary( 1842 context_diff, 1843 EnvironmentNamingInfo.from_environment_catalog_mapping( 1844 self.environment_catalog_mapping, 1845 name=environment, 1846 suffix_target=self.config.environment_suffix_target, 1847 normalize_name=context_diff.normalize_environment_name, 1848 ), 1849 self.default_catalog, 1850 no_diff=not detailed, 1851 ) 1852 return context_diff.has_changes 1853 1854 @python_api_analytics 1855 def table_diff( 1856 self, 1857 source: str, 1858 target: str, 1859 on: t.Optional[t.List[str] | exp.Condition] = None, 1860 skip_columns: t.Optional[t.List[str]] = None, 1861 select_models: t.Optional[t.Collection[str]] = None, 1862 where: t.Optional[str | exp.Condition] = None, 1863 limit: int = 20, 1864 show: bool = True, 1865 show_sample: bool = True, 1866 decimals: int = 3, 1867 skip_grain_check: bool = False, 1868 warn_grain_check: bool = False, 1869 temp_schema: t.Optional[str] = None, 1870 schema_diff_ignore_case: bool = False, 1871 **kwargs: t.Any, # catch-all to prevent an 'unexpected keyword argument' error if an table_diff extension passes in some extra arguments 1872 ) -> t.List[TableDiff]: 1873 """Show a diff between two tables. 1874 1875 Args: 1876 source: The source environment or table. 1877 target: The target environment or table. 1878 on: The join condition, table aliases must be "s" and "t" for source and target. 1879 If omitted, the table's grain will be used. 1880 skip_columns: The columns to skip when computing the table diff. 1881 select_models: The models or snapshots to use when environments are passed in. 1882 where: An optional where statement to filter results. 1883 limit: The limit of the sample dataframe. 1884 show: Show the table diff output in the console. 1885 show_sample: Show the sample dataframe in the console. Requires show=True. 1886 decimals: The number of decimal places to keep when comparing floating point columns. 1887 skip_grain_check: Skip check for rows that contain null or duplicate grains. 1888 temp_schema: The schema to use for temporary tables. 1889 1890 Returns: 1891 The list of TableDiff objects containing schema and summary differences. 1892 """ 1893 1894 if "|" in source or "|" in target: 1895 raise ConfigError( 1896 "Cross-database table diffing is available in Tobiko Cloud. Read more here: " 1897 "https://sqlmesh.readthedocs.io/en/stable/guides/tablediff/#diffing-tables-or-views-across-gateways" 1898 ) 1899 1900 table_diffs: t.List[TableDiff] = [] 1901 1902 # Diffs multiple or a single model across two environments 1903 if select_models: 1904 source_env = self.state_reader.get_environment(source) 1905 target_env = self.state_reader.get_environment(target) 1906 if not source_env: 1907 raise SQLMeshError(f"Could not find environment '{source}'") 1908 if not target_env: 1909 raise SQLMeshError(f"Could not find environment '{target}'") 1910 criteria = ", ".join(f"'{c}'" for c in select_models) 1911 try: 1912 selected_models = self._new_selector().expand_model_selections(select_models) 1913 if not selected_models: 1914 self.console.log_status_update( 1915 f"No models matched the selection criteria: {criteria}" 1916 ) 1917 except Exception as e: 1918 raise SQLMeshError(e) 1919 1920 models_to_diff: t.List[ 1921 t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Condition]] 1922 ] = [] 1923 models_without_grain: t.List[Model] = [] 1924 source_snapshots_to_name = { 1925 snapshot.name: snapshot for snapshot in source_env.snapshots 1926 } 1927 target_snapshots_to_name = { 1928 snapshot.name: snapshot for snapshot in target_env.snapshots 1929 } 1930 1931 for model_fqn in selected_models: 1932 model = self._models[model_fqn] 1933 adapter = self._get_engine_adapter(model.gateway) 1934 source_snapshot = source_snapshots_to_name.get(model.fqn) 1935 target_snapshot = target_snapshots_to_name.get(model.fqn) 1936 1937 if target_snapshot and source_snapshot: 1938 if (source_snapshot.fingerprint != target_snapshot.fingerprint) and ( 1939 (source_snapshot.version != target_snapshot.version) 1940 or source_snapshot.is_forward_only 1941 ): 1942 # Compare the virtual layer instead of the physical layer because the virtual layer is guaranteed to point 1943 # to the correct/active snapshot for the model in the specified environment, taking into account things like dev previews 1944 source = source_snapshot.qualified_view_name.for_environment( 1945 source_env.naming_info, adapter.dialect 1946 ) 1947 target = target_snapshot.qualified_view_name.for_environment( 1948 target_env.naming_info, adapter.dialect 1949 ) 1950 model_on = on or model.on 1951 if not model_on: 1952 models_without_grain.append(model) 1953 else: 1954 models_to_diff.append((model, adapter, source, target, model_on)) 1955 1956 if models_without_grain: 1957 model_names = "\n".join( 1958 f"─ {model.name} \n at '{model._path}'" for model in models_without_grain 1959 ) 1960 message = ( 1961 "SQLMesh doesn't know how to join the tables for the following models:\n" 1962 f"{model_names}\n\n" 1963 "Please specify a `grain` in each model definition. It must be unique and not null." 1964 ) 1965 if warn_grain_check: 1966 self.console.log_warning(message) 1967 else: 1968 raise SQLMeshError(message) 1969 1970 if models_to_diff: 1971 self.console.show_table_diff_details( 1972 [model[0].name for model in models_to_diff], 1973 ) 1974 1975 self.console.start_table_diff_progress(len(models_to_diff)) 1976 try: 1977 tasks_num = min(len(models_to_diff), self.concurrent_tasks) 1978 table_diffs = concurrent_apply_to_values( 1979 list(models_to_diff), 1980 lambda model_info: self._model_diff( 1981 model=model_info[0], 1982 adapter=model_info[1], 1983 source=model_info[2], 1984 target=model_info[3], 1985 on=model_info[4], 1986 source_alias=source_env.name, 1987 target_alias=target_env.name, 1988 limit=limit, 1989 decimals=decimals, 1990 skip_columns=skip_columns, 1991 where=where, 1992 show=show, 1993 temp_schema=temp_schema, 1994 skip_grain_check=skip_grain_check, 1995 schema_diff_ignore_case=schema_diff_ignore_case, 1996 ), 1997 tasks_num=tasks_num, 1998 ) 1999 self.console.stop_table_diff_progress(success=True) 2000 except: 2001 self.console.stop_table_diff_progress(success=False) 2002 raise 2003 elif selected_models: 2004 self.console.log_status_update( 2005 f"No models contain differences with the selection criteria: {criteria}" 2006 ) 2007 2008 else: 2009 table_diffs = [ 2010 self._table_diff( 2011 source=source, 2012 target=target, 2013 source_alias=source, 2014 target_alias=target, 2015 limit=limit, 2016 decimals=decimals, 2017 adapter=self.engine_adapter, 2018 on=on, 2019 skip_columns=skip_columns, 2020 where=where, 2021 schema_diff_ignore_case=schema_diff_ignore_case, 2022 ) 2023 ] 2024 2025 if show: 2026 self.console.show_table_diff(table_diffs, show_sample, skip_grain_check, temp_schema) 2027 2028 return table_diffs 2029 2030 def _model_diff( 2031 self, 2032 model: Model, 2033 adapter: EngineAdapter, 2034 source: str, 2035 target: str, 2036 source_alias: str, 2037 target_alias: str, 2038 limit: int, 2039 decimals: int, 2040 on: t.Optional[t.List[str] | exp.Condition] = None, 2041 skip_columns: t.Optional[t.List[str]] = None, 2042 where: t.Optional[str | exp.Condition] = None, 2043 show: bool = True, 2044 temp_schema: t.Optional[str] = None, 2045 skip_grain_check: bool = False, 2046 schema_diff_ignore_case: bool = False, 2047 ) -> TableDiff: 2048 self.console.start_table_diff_model_progress(model.name) 2049 2050 table_diff = self._table_diff( 2051 on=on, 2052 skip_columns=skip_columns, 2053 where=where, 2054 limit=limit, 2055 decimals=decimals, 2056 model=model, 2057 adapter=adapter, 2058 source=source, 2059 target=target, 2060 source_alias=source_alias, 2061 target_alias=target_alias, 2062 schema_diff_ignore_case=schema_diff_ignore_case, 2063 ) 2064 2065 if show: 2066 # Trigger row_diff in parallel execution so it's available for ordered display later 2067 table_diff.row_diff(temp_schema=temp_schema, skip_grain_check=skip_grain_check) 2068 2069 self.console.update_table_diff_progress(model.name) 2070 2071 return table_diff 2072 2073 def _table_diff( 2074 self, 2075 source: str, 2076 target: str, 2077 source_alias: str, 2078 target_alias: str, 2079 limit: int, 2080 decimals: int, 2081 adapter: EngineAdapter, 2082 on: t.Optional[t.List[str] | exp.Condition] = None, 2083 model: t.Optional[Model] = None, 2084 skip_columns: t.Optional[t.List[str]] = None, 2085 where: t.Optional[str | exp.Condition] = None, 2086 schema_diff_ignore_case: bool = False, 2087 ) -> TableDiff: 2088 if not on: 2089 raise SQLMeshError( 2090 "SQLMesh doesn't know how to join the two tables. Specify the `grains` in each model definition or pass join column names in separate `-o` flags." 2091 ) 2092 2093 return TableDiff( 2094 adapter=adapter.with_settings(execute_log_level=logger.getEffectiveLevel()), 2095 source=source, 2096 target=target, 2097 on=on, 2098 skip_columns=skip_columns, 2099 where=where, 2100 source_alias=source_alias, 2101 target_alias=target_alias, 2102 limit=limit, 2103 decimals=decimals, 2104 model_name=model.name if model else None, 2105 model_dialect=model.dialect if model else None, 2106 schema_diff_ignore_case=schema_diff_ignore_case, 2107 ) 2108 2109 @python_api_analytics 2110 def get_dag( 2111 self, select_models: t.Optional[t.Collection[str]] = None, **options: t.Any 2112 ) -> GraphHTML: 2113 """Gets an HTML object representation of the DAG. 2114 2115 Args: 2116 select_models: A list of model selection strings that should be included in the dag. 2117 Returns: 2118 An html object that renders the dag. 2119 """ 2120 dag = ( 2121 self.dag.prune(*self._new_selector().expand_model_selections(select_models)) 2122 if select_models 2123 else self.dag 2124 ) 2125 2126 nodes = {} 2127 edges: t.List[t.Dict] = [] 2128 2129 for node, deps in dag.graph.items(): 2130 nodes[node] = { 2131 "id": node, 2132 "label": node.split(".")[-1], 2133 "title": f"<span>{node}</span>", 2134 } 2135 edges.extend({"from": d, "to": node} for d in deps) 2136 2137 return GraphHTML( 2138 nodes, 2139 edges, 2140 options={ 2141 "height": "100%", 2142 "width": "100%", 2143 "interaction": {}, 2144 "layout": { 2145 "hierarchical": { 2146 "enabled": True, 2147 "nodeSpacing": 200, 2148 "sortMethod": "directed", 2149 }, 2150 }, 2151 "nodes": { 2152 "shape": "box", 2153 }, 2154 **options, 2155 }, 2156 ) 2157 2158 @python_api_analytics 2159 def render_dag(self, path: str, select_models: t.Optional[t.Collection[str]] = None) -> None: 2160 """Render the dag as HTML and save it to a file. 2161 2162 Args: 2163 path: filename to save the dag html to 2164 select_models: A list of model selection strings that should be included in the dag. 2165 """ 2166 file_path = Path(path) 2167 suffix = file_path.suffix 2168 if suffix != ".html": 2169 if suffix: 2170 get_console().log_warning( 2171 f"The extension {suffix} does not designate an html file. A file with a `.html` extension will be created instead." 2172 ) 2173 path = str(file_path.with_suffix(".html")) 2174 2175 with open(path, "w", encoding="utf-8") as file: 2176 file.write(str(self.get_dag(select_models))) 2177 2178 @python_api_analytics 2179 def create_test( 2180 self, 2181 model: str, 2182 input_queries: t.Dict[str, str], 2183 overwrite: bool = False, 2184 variables: t.Optional[t.Dict[str, str]] = None, 2185 path: t.Optional[str] = None, 2186 name: t.Optional[str] = None, 2187 include_ctes: bool = False, 2188 ) -> None: 2189 """Generate a unit test fixture for a given model. 2190 2191 Args: 2192 model: The model to test. 2193 input_queries: Mapping of model names to queries. Each model included in this mapping 2194 will be populated in the test based on the results of the corresponding query. 2195 overwrite: Whether to overwrite the existing test in case of a file path collision. 2196 When set to False, an error will be raised if there is such a collision. 2197 variables: Key-value pairs that will define variables needed by the model. 2198 path: The file path corresponding to the fixture, relative to the test directory. 2199 By default, the fixture will be created under the test directory and the file name 2200 will be inferred from the test's name. 2201 name: The name of the test. This is inferred from the model name by default. 2202 include_ctes: When true, CTE fixtures will also be generated. 2203 """ 2204 input_queries = { 2205 # The get_model here has two purposes: return normalized names & check for missing deps 2206 self.get_model(dep, raise_if_missing=True).fqn: query 2207 for dep, query in input_queries.items() 2208 } 2209 2210 try: 2211 model_to_test = self.get_model(model, raise_if_missing=True) 2212 test_adapter = self.test_connection_config.create_engine_adapter( 2213 register_comments_override=False 2214 ) 2215 2216 generate_test( 2217 model=model_to_test, 2218 input_queries=input_queries, 2219 models=self._models, 2220 engine_adapter=self._get_engine_adapter(model_to_test.gateway), 2221 test_engine_adapter=test_adapter, 2222 project_path=self.path, 2223 overwrite=overwrite, 2224 variables=variables, 2225 path=path, 2226 name=name, 2227 include_ctes=include_ctes, 2228 ) 2229 finally: 2230 if test_adapter: 2231 test_adapter.close() 2232 2233 @python_api_analytics 2234 def test( 2235 self, 2236 match_patterns: t.Optional[t.List[str]] = None, 2237 tests: t.Optional[t.List[str]] = None, 2238 verbosity: Verbosity = Verbosity.DEFAULT, 2239 preserve_fixtures: bool = False, 2240 stream: t.Optional[t.TextIO] = None, 2241 ) -> ModelTextTestResult: 2242 """Discover and run model tests""" 2243 if verbosity >= Verbosity.VERBOSE: 2244 import pandas as pd 2245 2246 pd.set_option("display.max_columns", None) 2247 2248 test_meta = self.select_tests(tests=tests, patterns=match_patterns) 2249 2250 result = run_tests( 2251 model_test_metadata=test_meta, 2252 models=self._models, 2253 config=self.config, 2254 selected_gateway=self.selected_gateway, 2255 dialect=self.default_dialect, 2256 verbosity=verbosity, 2257 preserve_fixtures=preserve_fixtures, 2258 stream=stream, 2259 default_catalog=self.default_catalog, 2260 default_catalog_dialect=self.config.dialect or "", 2261 ) 2262 2263 self.console.log_test_results( 2264 result, 2265 self.test_connection_config._engine_adapter.DIALECT, 2266 ) 2267 2268 return result 2269 2270 @python_api_analytics 2271 def audit( 2272 self, 2273 start: TimeLike, 2274 end: TimeLike, 2275 *, 2276 models: t.Optional[t.Iterator[str]] = None, 2277 execution_time: t.Optional[TimeLike] = None, 2278 ) -> bool: 2279 """Audit models. 2280 2281 Args: 2282 start: The start of the interval to audit. 2283 end: The end of the interval to audit. 2284 models: The models to audit. All models will be audited if not specified. 2285 execution_time: The date/time time reference to use for execution time. Defaults to now. 2286 2287 Returns: 2288 False if any of the audits failed, True otherwise. 2289 """ 2290 2291 snapshots = ( 2292 [self.get_snapshot(model, raise_if_missing=True) for model in models] 2293 if models 2294 else self.snapshots.values() 2295 ) 2296 2297 num_audits = sum(len(snapshot.node.audits_with_args) for snapshot in snapshots) 2298 self.console.log_status_update(f"Found {num_audits} audit(s).") 2299 2300 errors = [] 2301 skipped_count = 0 2302 for snapshot in snapshots: 2303 for audit_result in self.snapshot_evaluator.audit( 2304 snapshot=snapshot, 2305 start=start, 2306 end=end, 2307 execution_time=execution_time, 2308 snapshots=self.snapshots, 2309 ): 2310 audit_id = f"{audit_result.audit.name}" 2311 if audit_result.model: 2312 audit_id += f" on model {audit_result.model.name}" 2313 2314 if audit_result.skipped: 2315 self.console.log_status_update(f"{audit_id} ⏸️ SKIPPED.") 2316 skipped_count += 1 2317 elif audit_result.count: 2318 errors.append(audit_result) 2319 self.console.log_status_update( 2320 f"{audit_id} ❌ [red]FAIL [{audit_result.count}][/red]." 2321 ) 2322 else: 2323 self.console.log_status_update(f"{audit_id} ✅ [green]PASS[/green].") 2324 2325 self.console.log_status_update( 2326 f"\nFinished with {len(errors)} audit error{'' if len(errors) == 1 else 's'} " 2327 f"and {skipped_count} audit{'' if skipped_count == 1 else 's'} skipped." 2328 ) 2329 for error in errors: 2330 self.console.log_status_update( 2331 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 2332 ) 2333 self.console.log_status_update(f"Got {error.count} results, expected 0.") 2334 if error.query: 2335 self.console.show_sql( 2336 f"{error.query.sql(dialect=self.snapshot_evaluator.adapter.dialect)}" 2337 ) 2338 2339 self.console.log_status_update("Done.") 2340 return not errors 2341 2342 @python_api_analytics 2343 def rewrite(self, sql: str, dialect: str = "") -> exp.Expression: 2344 """Rewrite a sql expression with semantic references into an executable query. 2345 2346 https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ 2347 2348 Args: 2349 sql: The sql string to rewrite. 2350 dialect: The dialect of the sql string, defaults to the project dialect. 2351 2352 Returns: 2353 A SQLGlot expression with semantic references expanded. 2354 """ 2355 return rewrite( 2356 sql, 2357 graph=ReferenceGraph(self.models.values()), 2358 metrics=self._metrics, 2359 dialect=dialect or self.default_dialect, 2360 ) 2361 2362 @python_api_analytics 2363 def check_intervals( 2364 self, 2365 environment: t.Optional[str], 2366 no_signals: bool, 2367 select_models: t.Collection[str], 2368 start: t.Optional[TimeLike] = None, 2369 end: t.Optional[TimeLike] = None, 2370 ) -> t.Dict[Snapshot, SnapshotIntervals]: 2371 """Check intervals for a given environment. 2372 2373 Args: 2374 environment: The environment or prod if None. 2375 select_models: A list of model selection strings to show intervals for. 2376 start: The start of the intervals to check. 2377 end: The end of the intervals to check. 2378 """ 2379 2380 environment = environment or c.PROD 2381 env = self.state_reader.get_environment(environment) 2382 if not env: 2383 raise SQLMeshError(f"Environment '{environment}' was not found.") 2384 2385 snapshots = {k.name: v for k, v in self.state_sync.get_snapshots(env.snapshots).items()} 2386 2387 missing = { 2388 k.name: v 2389 for k, v in missing_intervals( 2390 snapshots.values(), start=start, end=end, execution_time=end 2391 ).items() 2392 } 2393 2394 if select_models: 2395 selected: t.Collection[str] = self._select_models_for_run( 2396 select_models, True, snapshots.values() 2397 ) 2398 else: 2399 selected = snapshots.keys() 2400 2401 results = {} 2402 execution_context = self.execution_context(snapshots=snapshots) 2403 2404 for fqn in selected: 2405 snapshot = snapshots[fqn] 2406 intervals = missing.get(fqn) or [] 2407 2408 results[snapshot] = SnapshotIntervals( 2409 snapshot.snapshot_id, 2410 intervals 2411 if no_signals 2412 else snapshot.check_ready_intervals(intervals, execution_context), 2413 ) 2414 2415 return results 2416 2417 @python_api_analytics 2418 def migrate(self) -> None: 2419 """Migrates SQLMesh to the current running version. 2420 2421 Please contact your SQLMesh administrator before doing this. 2422 """ 2423 self.notification_target_manager.notify(NotificationEvent.MIGRATION_START) 2424 self._load_materializations() 2425 try: 2426 self._new_state_sync().migrate( 2427 promoted_snapshots_only=self.config.migration.promoted_snapshots_only, 2428 ) 2429 except Exception as e: 2430 self.notification_target_manager.notify( 2431 NotificationEvent.MIGRATION_FAILURE, traceback.format_exc() 2432 ) 2433 raise e 2434 self.notification_target_manager.notify(NotificationEvent.MIGRATION_END) 2435 2436 @python_api_analytics 2437 def rollback(self) -> None: 2438 """Rolls back SQLMesh to the previous migration. 2439 2440 Please contact your SQLMesh administrator before doing this. This action cannot be undone. 2441 """ 2442 self._new_state_sync().rollback() 2443 2444 @python_api_analytics 2445 def create_external_models(self, strict: bool = False) -> None: 2446 """Create a file to document the schema of external models. 2447 2448 The external models file contains all columns and types of external models, allowing for more 2449 robust lineage, validation, and optimizations. 2450 2451 Args: 2452 strict: If True, raise an error if the external model is missing in the database. 2453 """ 2454 if not self._models: 2455 self.load(update_schemas=False) 2456 2457 for path, config in self.configs.items(): 2458 deprecated_yaml = path / c.EXTERNAL_MODELS_DEPRECATED_YAML 2459 2460 external_models_yaml = ( 2461 path / c.EXTERNAL_MODELS_YAML if not deprecated_yaml.exists() else deprecated_yaml 2462 ) 2463 2464 external_models_gateway: t.Optional[str] = self.gateway or self.config.default_gateway 2465 if not external_models_gateway: 2466 # can happen if there was no --gateway defined and the default_gateway is '' 2467 # which means that the single gateway syntax is being used which means there is 2468 # no named gateway which means we should not stamp `gateway:` on the external models 2469 external_models_gateway = None 2470 2471 create_external_models_file( 2472 path=external_models_yaml, 2473 models=UniqueKeyDict( 2474 "models", 2475 { 2476 fqn: model 2477 for fqn, model in self._models.items() 2478 if self.config_for_node(model) is config 2479 }, 2480 ), 2481 adapter=self.engine_adapter, 2482 state_reader=self.state_reader, 2483 dialect=config.model_defaults.dialect, 2484 gateway=external_models_gateway, 2485 max_workers=self.concurrent_tasks, 2486 strict=strict, 2487 ) 2488 2489 @python_api_analytics 2490 def print_info( 2491 self, skip_connection: bool = False, verbosity: Verbosity = Verbosity.DEFAULT 2492 ) -> None: 2493 """Prints information about connections, models, macros, etc. to the console.""" 2494 self.console.log_status_update(f"Models: {len(self.models)}") 2495 self.console.log_status_update(f"Macros: {len(self._macros) - len(macro.get_registry())}") 2496 2497 if skip_connection: 2498 return 2499 2500 if verbosity >= Verbosity.VERBOSE: 2501 self.console.log_status_update("") 2502 print_config(self.config.get_connection(self.gateway), self.console, "Connection") 2503 print_config( 2504 self.config.get_test_connection(self.gateway), self.console, "Test Connection" 2505 ) 2506 print_config( 2507 self.config.get_state_connection(self.gateway), self.console, "State Connection" 2508 ) 2509 2510 self._try_connection("data warehouse", self.engine_adapter.ping) 2511 state_connection = self.config.get_state_connection(self.gateway) 2512 if state_connection: 2513 self._try_connection("state backend", state_connection.connection_validator()) 2514 2515 @python_api_analytics 2516 def print_environment_names(self) -> None: 2517 """Prints all environment names along with expiry datetime.""" 2518 result = self._new_state_sync().get_environments_summary() 2519 if not result: 2520 raise SQLMeshError( 2521 "This project has no environments. Create an environment using the `sqlmesh plan` command." 2522 ) 2523 self.console.print_environments(result) 2524 2525 def close(self) -> None: 2526 """Releases all resources allocated by this context.""" 2527 if self._snapshot_evaluator: 2528 self._snapshot_evaluator.close() 2529 2530 if self._state_sync: 2531 self._state_sync.close() 2532 2533 def _run( 2534 self, 2535 environment: str, 2536 *, 2537 start: t.Optional[TimeLike], 2538 end: t.Optional[TimeLike], 2539 execution_time: t.Optional[TimeLike], 2540 ignore_cron: bool, 2541 select_models: t.Optional[t.Collection[str]], 2542 circuit_breaker: t.Optional[t.Callable[[], bool]], 2543 no_auto_upstream: bool, 2544 ) -> CompletionStatus: 2545 scheduler = self.scheduler(environment=environment) 2546 snapshots = scheduler.snapshots 2547 2548 if select_models is not None: 2549 select_models = self._select_models_for_run( 2550 select_models, no_auto_upstream, snapshots.values() 2551 ) 2552 2553 completion_status = scheduler.run( 2554 environment, 2555 start=start, 2556 end=end, 2557 execution_time=execution_time, 2558 ignore_cron=ignore_cron, 2559 circuit_breaker=circuit_breaker, 2560 selected_snapshots=select_models, 2561 auto_restatement_enabled=environment.lower() == c.PROD, 2562 run_environment_statements=True, 2563 ) 2564 2565 if completion_status.is_nothing_to_do: 2566 next_run_ready_msg = "" 2567 2568 next_ready_interval_start = get_next_model_interval_start(snapshots.values()) 2569 if next_ready_interval_start: 2570 utc_time = format_tz_datetime(next_ready_interval_start) 2571 local_time = format_tz_datetime(next_ready_interval_start, use_local_timezone=True) 2572 time_msg = local_time if local_time == utc_time else f"{local_time} ({utc_time})" 2573 next_run_ready_msg = f"\n\nNext run will be ready at {time_msg}." 2574 2575 self.console.log_status_update( 2576 f"No models are ready to run. Please wait until a model `cron` interval has elapsed.{next_run_ready_msg}" 2577 ) 2578 2579 return completion_status 2580 2581 def _apply(self, plan: Plan, circuit_breaker: t.Optional[t.Callable[[], bool]]) -> None: 2582 self._scheduler.create_plan_evaluator(self).evaluate( 2583 plan.to_evaluatable(), circuit_breaker=circuit_breaker 2584 ) 2585 2586 @python_api_analytics 2587 def table_name( 2588 self, model_name: str, environment: t.Optional[str] = None, prod: bool = False 2589 ) -> str: 2590 """Returns the name of the pysical table for the given model name in the target environment. 2591 2592 Args: 2593 model_name: The name of the model. 2594 environment: The environment to source the model version from. 2595 prod: If True, return the name of the physical table that will be used in production for the model version 2596 promoted in the target environment. 2597 2598 Returns: 2599 The name of the physical table. 2600 """ 2601 environment = environment or self.config.default_target_environment 2602 fqn = self._node_or_snapshot_to_fqn(model_name) 2603 target_env = self.state_reader.get_environment(environment) 2604 if not target_env: 2605 raise SQLMeshError(f"Environment '{environment}' was not found.") 2606 2607 snapshot_info = None 2608 for s in target_env.snapshots: 2609 if s.name == fqn: 2610 snapshot_info = s 2611 break 2612 if not snapshot_info: 2613 raise SQLMeshError( 2614 f"Model '{model_name}' was not found in environment '{environment}'." 2615 ) 2616 2617 if target_env.name == c.PROD or prod: 2618 return snapshot_info.table_name() 2619 2620 snapshots = self.state_reader.get_snapshots(target_env.snapshots) 2621 deployability_index = DeployabilityIndex.create(snapshots) 2622 2623 return snapshot_info.table_name( 2624 is_deployable=deployability_index.is_deployable(snapshot_info.snapshot_id) 2625 ) 2626 2627 def clear_caches(self) -> None: 2628 paths_to_remove = [path / c.CACHE for path in self.configs] 2629 paths_to_remove.append(self.cache_dir) 2630 2631 if IS_WINDOWS: 2632 paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] 2633 2634 for path in paths_to_remove: 2635 if path.exists(): 2636 rmtree(path) 2637 2638 if isinstance(self._state_sync, CachingStateSync): 2639 self._state_sync.clear_cache() 2640 2641 def export_state( 2642 self, 2643 output_file: Path, 2644 environment_names: t.Optional[t.List[str]] = None, 2645 local_only: bool = False, 2646 confirm: bool = True, 2647 ) -> None: 2648 from sqlmesh.core.state_sync.export_import import export_state 2649 2650 # trigger a connection to the StateSync so we can fail early if there is a problem 2651 # note we still need to do this even if we are doing a local export so we know what 'versions' to write 2652 self.state_sync.get_versions(validate=True) 2653 2654 local_snapshots = self.snapshots if local_only else None 2655 2656 if self.console.start_state_export( 2657 output_file=output_file, 2658 gateway=self.selected_gateway, 2659 state_connection_config=self._state_connection_config, 2660 environment_names=environment_names, 2661 local_only=local_only, 2662 confirm=confirm, 2663 ): 2664 try: 2665 export_state( 2666 state_sync=self.state_sync, 2667 output_file=output_file, 2668 local_snapshots=local_snapshots, 2669 environment_names=environment_names, 2670 console=self.console, 2671 ) 2672 self.console.stop_state_export(success=True, output_file=output_file) 2673 except: 2674 self.console.stop_state_export(success=False, output_file=output_file) 2675 raise 2676 2677 def import_state(self, input_file: Path, clear: bool = False, confirm: bool = True) -> None: 2678 from sqlmesh.core.state_sync.export_import import import_state 2679 2680 if self.console.start_state_import( 2681 input_file=input_file, 2682 gateway=self.selected_gateway, 2683 state_connection_config=self._state_connection_config, 2684 clear=clear, 2685 confirm=confirm, 2686 ): 2687 try: 2688 import_state( 2689 state_sync=self.state_sync, 2690 input_file=input_file, 2691 clear=clear, 2692 console=self.console, 2693 ) 2694 self.console.stop_state_import(success=True, input_file=input_file) 2695 except: 2696 self.console.stop_state_import(success=False, input_file=input_file) 2697 raise 2698 2699 def _run_tests( 2700 self, verbosity: Verbosity = Verbosity.DEFAULT 2701 ) -> t.Tuple[ModelTextTestResult, str]: 2702 test_output_io = StringIO() 2703 result = self.test(stream=test_output_io, verbosity=verbosity) 2704 return result, test_output_io.getvalue() 2705 2706 def _run_plan_tests(self, skip_tests: bool = False) -> t.Optional[ModelTextTestResult]: 2707 if not skip_tests: 2708 result = self.test() 2709 if not result.wasSuccessful(): 2710 raise PlanError( 2711 "Cannot generate plan due to failing test(s). Fix test(s) and run again." 2712 ) 2713 return result 2714 return None 2715 2716 @property 2717 def _model_tables(self) -> t.Dict[str, str]: 2718 """Mapping of model name to physical table name. 2719 2720 If a snapshot has not been versioned yet, its view name will be returned. 2721 """ 2722 return { 2723 fqn: ( 2724 snapshot.table_name() 2725 if snapshot.version 2726 else snapshot.qualified_view_name.for_environment( 2727 EnvironmentNamingInfo.from_environment_catalog_mapping( 2728 self.environment_catalog_mapping, 2729 name=c.PROD, 2730 suffix_target=self.config.environment_suffix_target, 2731 ) 2732 ) 2733 ) 2734 for fqn, snapshot in self.snapshots.items() 2735 } 2736 2737 @cached_property 2738 def cache_dir(self) -> Path: 2739 if self.config.cache_dir: 2740 cache_path = Path(self.config.cache_dir) 2741 if cache_path.is_absolute(): 2742 return cache_path 2743 return self.path / cache_path 2744 2745 # Default to .cache directory in the project path 2746 return self.path / c.CACHE 2747 2748 @cached_property 2749 def engine_adapters(self) -> t.Dict[str, EngineAdapter]: 2750 """Returns all the engine adapters for the gateways defined in the configurations.""" 2751 adapters: t.Dict[str, EngineAdapter] = {self.selected_gateway: self.engine_adapter} 2752 for config in self.configs.values(): 2753 for gateway_name in config.gateways: 2754 if gateway_name not in adapters: 2755 connection = config.get_connection(gateway_name) 2756 adapter = connection.create_engine_adapter( 2757 concurrent_tasks=self.concurrent_tasks, 2758 ) 2759 adapters[gateway_name] = adapter 2760 return adapters 2761 2762 @cached_property 2763 def default_catalog_per_gateway(self) -> t.Dict[str, str]: 2764 """Returns the default catalogs for each engine adapter.""" 2765 return self._scheduler.get_default_catalog_per_gateway(self) 2766 2767 @property 2768 def concurrent_tasks(self) -> int: 2769 if self._concurrent_tasks is None: 2770 self._concurrent_tasks = self.connection_config.concurrent_tasks 2771 return self._concurrent_tasks 2772 2773 @cached_property 2774 def connection_config(self) -> ConnectionConfig: 2775 return self.config.get_connection(self.selected_gateway) 2776 2777 @cached_property 2778 def test_connection_config(self) -> ConnectionConfig: 2779 return self.config.get_test_connection( 2780 self.gateway, 2781 self.default_catalog, 2782 default_catalog_dialect=self.config.dialect, 2783 ) 2784 2785 @cached_property 2786 def environment_catalog_mapping(self) -> RegexKeyDict: 2787 engine_adapter = None 2788 try: 2789 engine_adapter = self.engine_adapter 2790 except Exception: 2791 pass 2792 2793 if ( 2794 self.config.environment_catalog_mapping 2795 and engine_adapter 2796 and not self.engine_adapter.catalog_support.is_multi_catalog_supported 2797 ): 2798 raise SQLMeshError( 2799 "Environment catalog mapping is only supported for engine adapters that support multiple catalogs" 2800 ) 2801 return self.config.environment_catalog_mapping 2802 2803 def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 2804 if gateway: 2805 if adapter := self.engine_adapters.get(gateway): 2806 return adapter 2807 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 2808 return self.engine_adapter 2809 2810 def _snapshots( 2811 self, models_override: t.Optional[UniqueKeyDict[str, Model]] = None 2812 ) -> t.Dict[str, Snapshot]: 2813 nodes = {**(models_override or self._models), **self._standalone_audits} 2814 snapshots = self._nodes_to_snapshots(nodes) 2815 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2816 2817 unrestorable_snapshots = { 2818 snapshot 2819 for snapshot in stored_snapshots.values() 2820 if snapshot.name in nodes and snapshot.unrestorable 2821 } 2822 if unrestorable_snapshots: 2823 for snapshot in unrestorable_snapshots: 2824 logger.info( 2825 "Found a unrestorable snapshot %s. Restamping the model...", snapshot.name 2826 ) 2827 node = nodes[snapshot.name] 2828 nodes[snapshot.name] = node.copy( 2829 update={"stamp": f"revert to {snapshot.identifier}"} 2830 ) 2831 snapshots = self._nodes_to_snapshots(nodes) 2832 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2833 2834 for snapshot in stored_snapshots.values(): 2835 # Keep the original model instance to preserve the query cache. 2836 snapshot.node = snapshots[snapshot.name].node 2837 2838 return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in snapshots.items()} 2839 2840 def _context_diff( 2841 self, 2842 environment: str, 2843 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 2844 create_from: t.Optional[str] = None, 2845 force_no_diff: bool = False, 2846 ensure_finalized_snapshots: bool = False, 2847 diff_rendered: bool = False, 2848 always_recreate_environment: bool = False, 2849 ) -> ContextDiff: 2850 environment = Environment.sanitize_name(environment) 2851 if force_no_diff: 2852 return ContextDiff.create_no_diff(environment, self.state_reader) 2853 2854 return ContextDiff.create( 2855 environment, 2856 snapshots=snapshots or self.snapshots, 2857 create_from=create_from or c.PROD, 2858 state_reader=self.state_reader, 2859 provided_requirements=self._requirements, 2860 excluded_requirements=self._excluded_requirements, 2861 ensure_finalized_snapshots=ensure_finalized_snapshots, 2862 diff_rendered=diff_rendered, 2863 environment_statements=self._environment_statements, 2864 gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, 2865 infer_python_dependencies=self.config.infer_python_dependencies, 2866 always_recreate_environment=always_recreate_environment, 2867 ) 2868 2869 def _destroy(self) -> bool: 2870 # Invalidate all environments, including prod 2871 for environment in self.state_reader.get_environments(): 2872 self.state_sync.invalidate_environment(name=environment.name, protect_prod=False) 2873 self.console.log_success(f"Environment '{environment.name}' invalidated.") 2874 2875 # Run janitor to clean up all objects 2876 self._run_janitor(ignore_ttl=True) 2877 2878 # Remove state tables, including backup tables 2879 self.state_sync.remove_state(including_backup=True) 2880 self.console.log_status_update("State tables removed.") 2881 2882 # Finally clear caches 2883 self.clear_caches() 2884 2885 return True 2886 2887 def _run_janitor(self, ignore_ttl: bool = False) -> None: 2888 current_ts = now_timestamp() 2889 2890 # Clean up expired environments by removing their views and schemas 2891 self._cleanup_environments(current_ts=current_ts) 2892 2893 delete_expired_snapshots( 2894 self.state_sync, 2895 self.snapshot_evaluator, 2896 current_ts=current_ts, 2897 ignore_ttl=ignore_ttl, 2898 console=self.console, 2899 batch_size=self.config.janitor.expired_snapshots_batch_size, 2900 ) 2901 self.state_sync.compact_intervals() 2902 2903 def _cleanup_environments(self, current_ts: t.Optional[int] = None) -> None: 2904 current_ts = current_ts or now_timestamp() 2905 2906 expired_environments_summaries = self.state_sync.get_expired_environments( 2907 current_ts=current_ts 2908 ) 2909 2910 for expired_env_summary in expired_environments_summaries: 2911 expired_env = self.state_reader.get_environment(expired_env_summary.name) 2912 2913 if expired_env: 2914 cleanup_expired_views( 2915 default_adapter=self.engine_adapter, 2916 engine_adapters=self.engine_adapters, 2917 environments=[expired_env], 2918 warn_on_delete_failure=self.config.janitor.warn_on_delete_failure, 2919 console=self.console, 2920 ) 2921 2922 self.state_sync.delete_expired_environments(current_ts=current_ts) 2923 2924 def _try_connection(self, connection_name: str, validator: t.Callable[[], None]) -> None: 2925 connection_name = connection_name.capitalize() 2926 try: 2927 validator() 2928 self.console.log_status_update(f"{connection_name} connection [green]succeeded[/green]") 2929 except Exception as ex: 2930 self.console.log_error(f"{connection_name} connection failed. {ex}") 2931 2932 def _new_state_sync(self) -> StateSync: 2933 return self._provided_state_sync or self._scheduler.create_state_sync(self) 2934 2935 def _new_selector( 2936 self, models: t.Optional[UniqueKeyDict[str, Model]] = None, dag: t.Optional[DAG[str]] = None 2937 ) -> Selector: 2938 return self._selector_cls( 2939 self.state_reader, 2940 models=models or self._models, 2941 context_path=self.path, 2942 dag=dag, 2943 default_catalog=self.default_catalog, 2944 dialect=self.default_dialect, 2945 cache_dir=self.cache_dir, 2946 ) 2947 2948 def _register_notification_targets(self) -> None: 2949 event_notifications = collections.defaultdict(set) 2950 for target in self.notification_targets: 2951 if target.is_configured: 2952 for event in target.notify_on: 2953 event_notifications[event].add(target) 2954 user_notification_targets = { 2955 user.username: set( 2956 target for target in user.notification_targets if target.is_configured 2957 ) 2958 for user in self.users 2959 } 2960 self.notification_target_manager = NotificationTargetManager( 2961 event_notifications, user_notification_targets, username=self.config.username 2962 ) 2963 2964 def _load_materializations(self) -> None: 2965 if not self._loaded: 2966 for loader in self._loaders: 2967 loader.load_materializations() 2968 2969 def _select_models_for_run( 2970 self, 2971 select_models: t.Collection[str], 2972 no_auto_upstream: bool, 2973 snapshots: t.Collection[Snapshot], 2974 ) -> t.Set[str]: 2975 models: UniqueKeyDict[str, Model] = UniqueKeyDict( 2976 "models", **{s.name: s.model for s in snapshots if s.is_model} 2977 ) 2978 dag: DAG[str] = DAG() 2979 for fqn, model in models.items(): 2980 dag.add(fqn, model.depends_on) 2981 model_selector = self._new_selector(models=models, dag=dag) 2982 result = set(model_selector.expand_model_selections(select_models)) 2983 if not no_auto_upstream: 2984 result = set(dag.subdag(*result)) 2985 return result 2986 2987 @cached_property 2988 def _project_type(self) -> str: 2989 project_types = { 2990 c.DBT if loader.__class__.__name__.lower().startswith(c.DBT) else c.NATIVE 2991 for loader in self._loaders 2992 } 2993 return c.HYBRID if len(project_types) > 1 else first(project_types) 2994 2995 def _nodes_to_snapshots(self, nodes: t.Dict[str, Node]) -> t.Dict[str, Snapshot]: 2996 snapshots: t.Dict[str, Snapshot] = {} 2997 fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} 2998 2999 for node in nodes.values(): 3000 kwargs: t.Dict[str, t.Any] = {} 3001 if node.project in self._projects: 3002 config = self.config_for_node(node) 3003 kwargs["ttl"] = config.snapshot_ttl 3004 kwargs["table_naming_convention"] = config.physical_table_naming_convention 3005 3006 snapshot = Snapshot.from_node( 3007 node, 3008 nodes=nodes, 3009 cache=fingerprint_cache, 3010 **kwargs, 3011 ) 3012 snapshots[snapshot.name] = snapshot 3013 return snapshots 3014 3015 def _node_or_snapshot_to_fqn(self, node_or_snapshot: NodeOrSnapshot) -> str: 3016 if isinstance(node_or_snapshot, Snapshot): 3017 return node_or_snapshot.name 3018 if isinstance(node_or_snapshot, str) and not self.standalone_audits.get(node_or_snapshot): 3019 return normalize_model_name( 3020 node_or_snapshot, 3021 dialect=self.default_dialect, 3022 default_catalog=self.default_catalog, 3023 ) 3024 if not isinstance(node_or_snapshot, str): 3025 return node_or_snapshot.fqn 3026 return node_or_snapshot 3027 3028 @property 3029 def _plan_preview_enabled(self) -> bool: 3030 if self.config.plan.enable_preview is not None: 3031 return self.config.plan.enable_preview 3032 # It is dangerous to enable preview by default for dbt projects that rely on engines that don't support cloning. 3033 # Enabling previews in such cases can result in unintended full refreshes because dbt incremental models rely on 3034 # the maximum timestamp value in the target table. 3035 return self._project_type == c.NATIVE or self.engine_adapter.SUPPORTS_CLONING 3036 3037 def _get_plan_default_start_end( 3038 self, 3039 snapshots: t.Dict[str, Snapshot], 3040 max_interval_end_per_model: t.Dict[str, datetime], 3041 backfill_models: t.Optional[t.Set[str]], 3042 modified_model_names: t.Set[str], 3043 execution_time: t.Optional[TimeLike] = None, 3044 ) -> t.Tuple[t.Optional[int], t.Optional[int]]: 3045 if not max_interval_end_per_model: 3046 return None, None 3047 3048 default_end = to_timestamp(max(max_interval_end_per_model.values())) 3049 default_start: t.Optional[int] = None 3050 # Infer the default start by finding the smallest interval start that corresponds to the default end. 3051 for model_name in backfill_models or modified_model_names or max_interval_end_per_model: 3052 if model_name not in snapshots: 3053 continue 3054 node = snapshots[model_name].node 3055 interval_unit = node.interval_unit 3056 default_start = min( 3057 default_start or sys.maxsize, 3058 to_timestamp( 3059 interval_unit.cron_prev( 3060 interval_unit.cron_floor( 3061 max_interval_end_per_model.get( 3062 model_name, node.cron_floor(default_end) 3063 ), 3064 ), 3065 estimate=True, 3066 ) 3067 ), 3068 ) 3069 3070 if execution_time and to_timestamp(default_end) > to_timestamp(execution_time): 3071 # the end date can't be in the future, which can happen if a specific `execution_time` is set and prod intervals 3072 # are newer than it 3073 default_end = to_timestamp(execution_time) 3074 3075 return default_start, default_end 3076 3077 def _calculate_start_override_per_model( 3078 self, 3079 min_intervals: t.Optional[int], 3080 plan_start: t.Optional[TimeLike], 3081 plan_end: t.Optional[TimeLike], 3082 plan_execution_time: TimeLike, 3083 backfill_model_fqns: t.Optional[t.Set[str]], 3084 snapshots_by_model_fqn: t.Dict[str, Snapshot], 3085 end_override_per_model: t.Optional[t.Dict[str, datetime]], 3086 ) -> t.Dict[str, datetime]: 3087 if not min_intervals or not backfill_model_fqns or not plan_start: 3088 # If there are no models to backfill, there are no intervals to consider for backfill, so we dont need to consider a minimum number 3089 # If the plan doesnt have a start date, all intervals are considered already so we dont need to consider a minimum number 3090 # If we dont have a minimum number of intervals to consider, then we dont need to adjust the start date on a per-model basis 3091 return {} 3092 3093 start_overrides: t.Dict[str, datetime] = {} 3094 end_override_per_model = end_override_per_model or {} 3095 3096 plan_execution_time_dt = to_datetime(plan_execution_time) 3097 plan_start_dt = to_datetime(plan_start, relative_base=plan_execution_time_dt) 3098 plan_end_dt = to_datetime( 3099 plan_end or plan_execution_time_dt, relative_base=plan_execution_time_dt 3100 ) 3101 3102 # we need to take the DAG into account so that parent models can be expanded to cover at least as much as their children 3103 # for example, A(hourly) <- B(daily) 3104 # if min_intervals=1, A would have 1 hour and B would have 1 day 3105 # but B depends on A so in order for B to have 1 valid day, A needs to be expanded to 24 hours 3106 backfill_dag: DAG[str] = DAG() 3107 for fqn in backfill_model_fqns: 3108 backfill_dag.add( 3109 fqn, 3110 [ 3111 p.name 3112 for p in snapshots_by_model_fqn[fqn].parents 3113 if p.name in backfill_model_fqns 3114 ], 3115 ) 3116 3117 # start from the leaf nodes and work back towards the root because the min_start at the root node is determined by the calculated starts in the leaf nodes 3118 reversed_dag = backfill_dag.reversed 3119 graph = reversed_dag.graph 3120 3121 for model_fqn in reversed_dag: 3122 # Get the earliest start from all immediate children of this snapshot 3123 # this works because topological ordering guarantees that they've already been visited 3124 # and we always set a start override 3125 min_child_start = min( 3126 [start_overrides[immediate_child_fqn] for immediate_child_fqn in graph[model_fqn]], 3127 default=plan_start_dt, 3128 ) 3129 3130 snapshot = snapshots_by_model_fqn.get(model_fqn) 3131 3132 if not snapshot: 3133 continue 3134 3135 starting_point = end_override_per_model.get(model_fqn, plan_end_dt) 3136 if node_end := snapshot.node.end: 3137 # if we dont do this, if the node end is a *date* (as opposed to a timestamp) 3138 # we end up incorrectly winding back an extra day 3139 node_end_dt = make_exclusive(node_end) 3140 3141 if node_end_dt < plan_end_dt: 3142 # if the model has an end date that has already elapsed, use that as a starting point for calculating min_intervals 3143 # instead of the plan end. If we use the plan end, we will return intervals in the future which are invalid 3144 starting_point = node_end_dt 3145 3146 snapshot_start = snapshot.node.cron_floor(starting_point) 3147 3148 for _ in range(min_intervals): 3149 # wind back the starting point by :min_intervals intervals to arrive at the minimum snapshot start date 3150 snapshot_start = snapshot.node.cron_prev(snapshot_start) 3151 3152 start_overrides[model_fqn] = min(min_child_start, snapshot_start) 3153 3154 return start_overrides 3155 3156 def _get_max_interval_end_per_model( 3157 self, snapshots: t.Dict[str, Snapshot], backfill_models: t.Optional[t.Set[str]] 3158 ) -> t.Dict[str, datetime]: 3159 models_for_interval_end = ( 3160 self._get_models_for_interval_end(snapshots, backfill_models) 3161 if backfill_models is not None 3162 else None 3163 ) 3164 return { 3165 model_fqn: to_datetime(ts) 3166 for model_fqn, ts in self.state_sync.max_interval_end_per_model( 3167 c.PROD, 3168 models=models_for_interval_end, 3169 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 3170 ).items() 3171 } 3172 3173 @staticmethod 3174 def _get_models_for_interval_end( 3175 snapshots: t.Dict[str, Snapshot], backfill_models: t.Set[str] 3176 ) -> t.Set[str]: 3177 models_for_interval_end = set() 3178 models_stack = list(backfill_models) 3179 while models_stack: 3180 next_model = models_stack.pop() 3181 if next_model not in snapshots: 3182 continue 3183 models_for_interval_end.add(next_model) 3184 models_stack.extend( 3185 s.name 3186 for s in snapshots[next_model].parents 3187 if s.name not in models_for_interval_end 3188 ) 3189 return models_for_interval_end 3190 3191 def lint_models( 3192 self, 3193 models: t.Optional[t.Iterable[t.Union[str, Model]]] = None, 3194 raise_on_error: bool = True, 3195 ) -> t.List[AnnotatedRuleViolation]: 3196 found_error = False 3197 3198 model_list = ( 3199 list(self.get_model(model, raise_if_missing=True) for model in models) 3200 if models 3201 else self.models.values() 3202 ) 3203 all_violations = [] 3204 for model in model_list: 3205 # Linter may be `None` if the context is not loaded yet 3206 if linter := self._linters.get(model.project): 3207 lint_violation, violations = ( 3208 linter.lint_model(model, self, console=self.console) or found_error 3209 ) 3210 if lint_violation: 3211 found_error = True 3212 all_violations.extend(violations) 3213 3214 if raise_on_error and found_error: 3215 raise LinterError( 3216 "Linter detected errors in the code. Please fix them before proceeding." 3217 ) 3218 3219 return all_violations 3220 3221 def select_tests( 3222 self, 3223 tests: t.Optional[t.List[str]] = None, 3224 patterns: t.Optional[t.List[str]] = None, 3225 ) -> t.List[ModelTestMetadata]: 3226 """Filter pre-loaded test metadata based on tests and patterns.""" 3227 3228 test_meta = self._model_test_metadata 3229 3230 if tests: 3231 filtered_tests = [] 3232 for test in tests: 3233 if "::" in test: 3234 if test in self._model_test_metadata_fully_qualified_name_index: 3235 filtered_tests.append( 3236 self._model_test_metadata_fully_qualified_name_index[test] 3237 ) 3238 else: 3239 test_path = Path(test) 3240 if test_path in self._model_test_metadata_path_index: 3241 filtered_tests.extend(self._model_test_metadata_path_index[test_path]) 3242 3243 test_meta = filtered_tests 3244 3245 if patterns: 3246 test_meta = filter_tests_by_patterns(test_meta, patterns) 3247 3248 return test_meta 3249 3250 3251class Context(GenericContext[Config]): 3252 CONFIG_TYPE = Config
167class BaseContext(abc.ABC): 168 """The base context which defines methods to execute a model.""" 169 170 @property 171 @abc.abstractmethod 172 def default_dialect(self) -> t.Optional[str]: 173 """Returns the default dialect.""" 174 175 @property 176 @abc.abstractmethod 177 def _model_tables(self) -> t.Dict[str, str]: 178 """Returns a mapping of model names to tables.""" 179 180 @property 181 @abc.abstractmethod 182 def engine_adapter(self) -> EngineAdapter: 183 """Returns an engine adapter.""" 184 185 @property 186 def spark(self) -> t.Optional[PySparkSession]: 187 """Returns the spark session if it exists.""" 188 return self.engine_adapter.spark 189 190 @property 191 def snowpark(self) -> t.Optional[SnowparkSession]: 192 """Returns the snowpark session if it exists.""" 193 return self.engine_adapter.snowpark 194 195 @property 196 def bigframe(self) -> t.Optional[BigframeSession]: 197 """Returns the bigframe session if it exists.""" 198 return self.engine_adapter.bigframe 199 200 @property 201 def default_catalog(self) -> t.Optional[str]: 202 raise NotImplementedError 203 204 def table(self, model_name: str) -> str: 205 get_console().log_warning( 206 "The SQLMesh context's `table` method is deprecated and will be removed " 207 "in a future release. Please use the `resolve_table` method instead." 208 ) 209 return self.resolve_table(model_name) 210 211 def resolve_table(self, model_name: str) -> str: 212 """Gets the physical table name for a given model. 213 214 Args: 215 model_name: The model name. 216 217 Returns: 218 The physical table name. 219 """ 220 model_name = normalize_model_name(model_name, self.default_catalog, self.default_dialect) 221 222 if model_name not in self._model_tables: 223 model_name_list = "\n".join(list(self._model_tables)) 224 logger.debug( 225 f"'{model_name}' not found in model to table mapping. Available model names: \n{model_name_list}" 226 ) 227 raise SQLMeshError( 228 f"Unable to find a table mapping for model '{model_name}'. Has it been spelled correctly?" 229 ) 230 231 # We generate SQL for the default dialect because the table name may be used in a 232 # fetchdf call and so the quotes need to be correct (eg. backticks for bigquery) 233 return parse_one(self._model_tables[model_name]).sql( 234 dialect=self.default_dialect, identify=True 235 ) 236 237 def fetchdf( 238 self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False 239 ) -> pd.DataFrame: 240 """Fetches a dataframe given a sql string or sqlglot expression. 241 242 Args: 243 query: SQL string or sqlglot expression. 244 quote_identifiers: Whether to quote all identifiers in the query. 245 246 Returns: 247 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 248 """ 249 return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers) 250 251 def fetch_pyspark_df( 252 self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False 253 ) -> PySparkDataFrame: 254 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 255 256 Args: 257 query: SQL string or sqlglot expression. 258 quote_identifiers: Whether to quote all identifiers in the query. 259 260 Returns: 261 A PySpark dataframe. 262 """ 263 return self.engine_adapter.fetch_pyspark_df(query, quote_identifiers=quote_identifiers)
The base context which defines methods to execute a model.
170 @property 171 @abc.abstractmethod 172 def default_dialect(self) -> t.Optional[str]: 173 """Returns the default dialect."""
Returns the default dialect.
180 @property 181 @abc.abstractmethod 182 def engine_adapter(self) -> EngineAdapter: 183 """Returns an engine adapter."""
Returns an engine adapter.
185 @property 186 def spark(self) -> t.Optional[PySparkSession]: 187 """Returns the spark session if it exists.""" 188 return self.engine_adapter.spark
Returns the spark session if it exists.
190 @property 191 def snowpark(self) -> t.Optional[SnowparkSession]: 192 """Returns the snowpark session if it exists.""" 193 return self.engine_adapter.snowpark
Returns the snowpark session if it exists.
195 @property 196 def bigframe(self) -> t.Optional[BigframeSession]: 197 """Returns the bigframe session if it exists.""" 198 return self.engine_adapter.bigframe
Returns the bigframe session if it exists.
211 def resolve_table(self, model_name: str) -> str: 212 """Gets the physical table name for a given model. 213 214 Args: 215 model_name: The model name. 216 217 Returns: 218 The physical table name. 219 """ 220 model_name = normalize_model_name(model_name, self.default_catalog, self.default_dialect) 221 222 if model_name not in self._model_tables: 223 model_name_list = "\n".join(list(self._model_tables)) 224 logger.debug( 225 f"'{model_name}' not found in model to table mapping. Available model names: \n{model_name_list}" 226 ) 227 raise SQLMeshError( 228 f"Unable to find a table mapping for model '{model_name}'. Has it been spelled correctly?" 229 ) 230 231 # We generate SQL for the default dialect because the table name may be used in a 232 # fetchdf call and so the quotes need to be correct (eg. backticks for bigquery) 233 return parse_one(self._model_tables[model_name]).sql( 234 dialect=self.default_dialect, identify=True 235 )
Gets the physical table name for a given model.
Arguments:
- model_name: The model name.
Returns:
The physical table name.
237 def fetchdf( 238 self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False 239 ) -> pd.DataFrame: 240 """Fetches a dataframe given a sql string or sqlglot expression. 241 242 Args: 243 query: SQL string or sqlglot expression. 244 quote_identifiers: Whether to quote all identifiers in the query. 245 246 Returns: 247 The default dataframe is Pandas, but for Spark a PySpark dataframe is returned. 248 """ 249 return self.engine_adapter.fetchdf(query, quote_identifiers=quote_identifiers)
Fetches a dataframe given a sql string or sqlglot expression.
Arguments:
- query: SQL string or sqlglot expression.
- quote_identifiers: Whether to quote all identifiers in the query.
Returns:
The default dataframe is Pandas, but for Spark a PySpark dataframe is returned.
251 def fetch_pyspark_df( 252 self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False 253 ) -> PySparkDataFrame: 254 """Fetches a PySpark dataframe given a sql string or sqlglot expression. 255 256 Args: 257 query: SQL string or sqlglot expression. 258 quote_identifiers: Whether to quote all identifiers in the query. 259 260 Returns: 261 A PySpark dataframe. 262 """ 263 return self.engine_adapter.fetch_pyspark_df(query, quote_identifiers=quote_identifiers)
Fetches a PySpark dataframe given a sql string or sqlglot expression.
Arguments:
- query: SQL string or sqlglot expression.
- quote_identifiers: Whether to quote all identifiers in the query.
Returns:
A PySpark dataframe.
266class ExecutionContext(BaseContext): 267 """The minimal context needed to execute a model. 268 269 Args: 270 engine_adapter: The engine adapter to execute queries against. 271 snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations. 272 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 273 """ 274 275 def __init__( 276 self, 277 engine_adapter: EngineAdapter, 278 snapshots: t.Dict[str, Snapshot], 279 deployability_index: t.Optional[DeployabilityIndex] = None, 280 default_dialect: t.Optional[str] = None, 281 default_catalog: t.Optional[str] = None, 282 is_restatement: t.Optional[bool] = None, 283 parent_intervals: t.Optional[Intervals] = None, 284 variables: t.Optional[t.Dict[str, t.Any]] = None, 285 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 286 ): 287 self.snapshots = snapshots 288 self.deployability_index = deployability_index 289 self._engine_adapter = engine_adapter 290 self._default_catalog = default_catalog 291 self._default_dialect = default_dialect 292 self._variables = variables or {} 293 self._blueprint_variables = blueprint_variables or {} 294 self._is_restatement = is_restatement 295 self._parent_intervals = parent_intervals 296 297 @property 298 def default_dialect(self) -> t.Optional[str]: 299 return self._default_dialect 300 301 @property 302 def engine_adapter(self) -> EngineAdapter: 303 """Returns an engine adapter.""" 304 return self._engine_adapter 305 306 @cached_property 307 def _model_tables(self) -> t.Dict[str, str]: 308 """Returns a mapping of model names to tables.""" 309 return to_table_mapping(self.snapshots.values(), self.deployability_index) 310 311 @property 312 def default_catalog(self) -> t.Optional[str]: 313 return self._default_catalog 314 315 @property 316 def gateway(self) -> t.Optional[str]: 317 """Returns the gateway name.""" 318 return self.var(c.GATEWAY) 319 320 @property 321 def is_restatement(self) -> t.Optional[bool]: 322 return self._is_restatement 323 324 @property 325 def parent_intervals(self) -> t.Optional[Intervals]: 326 return self._parent_intervals 327 328 def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 329 """Returns a variable value.""" 330 return self._variables.get(var_name.lower(), default) 331 332 def blueprint_var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 333 """Returns a blueprint variable value.""" 334 return self._blueprint_variables.get(var_name.lower(), default) 335 336 def with_variables( 337 self, 338 variables: t.Dict[str, t.Any], 339 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 340 ) -> ExecutionContext: 341 """Returns a new ExecutionContext with additional variables.""" 342 return ExecutionContext( 343 self._engine_adapter, 344 self.snapshots, 345 self.deployability_index, 346 self._default_dialect, 347 self._default_catalog, 348 self._is_restatement, 349 variables=variables, 350 blueprint_variables=blueprint_variables, 351 )
The minimal context needed to execute a model.
Arguments:
- engine_adapter: The engine adapter to execute queries against.
- snapshots: All upstream snapshots (by model name) to use for expansion and mapping of physical locations.
- deployability_index: Determines snapshots that are deployable in the context of this evaluation.
275 def __init__( 276 self, 277 engine_adapter: EngineAdapter, 278 snapshots: t.Dict[str, Snapshot], 279 deployability_index: t.Optional[DeployabilityIndex] = None, 280 default_dialect: t.Optional[str] = None, 281 default_catalog: t.Optional[str] = None, 282 is_restatement: t.Optional[bool] = None, 283 parent_intervals: t.Optional[Intervals] = None, 284 variables: t.Optional[t.Dict[str, t.Any]] = None, 285 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 286 ): 287 self.snapshots = snapshots 288 self.deployability_index = deployability_index 289 self._engine_adapter = engine_adapter 290 self._default_catalog = default_catalog 291 self._default_dialect = default_dialect 292 self._variables = variables or {} 293 self._blueprint_variables = blueprint_variables or {} 294 self._is_restatement = is_restatement 295 self._parent_intervals = parent_intervals
301 @property 302 def engine_adapter(self) -> EngineAdapter: 303 """Returns an engine adapter.""" 304 return self._engine_adapter
Returns an engine adapter.
315 @property 316 def gateway(self) -> t.Optional[str]: 317 """Returns the gateway name.""" 318 return self.var(c.GATEWAY)
Returns the gateway name.
328 def var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 329 """Returns a variable value.""" 330 return self._variables.get(var_name.lower(), default)
Returns a variable value.
332 def blueprint_var(self, var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]: 333 """Returns a blueprint variable value.""" 334 return self._blueprint_variables.get(var_name.lower(), default)
Returns a blueprint variable value.
336 def with_variables( 337 self, 338 variables: t.Dict[str, t.Any], 339 blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None, 340 ) -> ExecutionContext: 341 """Returns a new ExecutionContext with additional variables.""" 342 return ExecutionContext( 343 self._engine_adapter, 344 self.snapshots, 345 self.deployability_index, 346 self._default_dialect, 347 self._default_catalog, 348 self._is_restatement, 349 variables=variables, 350 blueprint_variables=blueprint_variables, 351 )
Returns a new ExecutionContext with additional variables.
Inherited Members
354class GenericContext(BaseContext, t.Generic[C]): 355 """Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks. 356 357 Args: 358 notification_targets: The notification target to use. Defaults to what is defined in config. 359 paths: The directories containing SQLMesh files. 360 config: A Config object or the name of a Config object in config.py. 361 connection: The name of the connection. If not specified the first connection as it appears 362 in configuration will be used. 363 test_connection: The name of the connection to use for tests. If not specified the first 364 connection as it appears in configuration will be used. 365 concurrent_tasks: The maximum number of tasks that can use the connection concurrently. 366 load: Whether or not to automatically load all models and macros (default True). 367 console: The rich instance used for printing out CLI command results. 368 users: A list of users to make known to SQLMesh. 369 """ 370 371 CONFIG_TYPE: t.Type[C] 372 """The type of config object to use (default: Config).""" 373 374 PLAN_BUILDER_TYPE = PlanBuilder 375 """The type of plan builder object to use (default: PlanBuilder).""" 376 377 def __init__( 378 self, 379 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 380 state_sync: t.Optional[StateSync] = None, 381 paths: t.Union[str | Path, t.Iterable[str | Path]] = "", 382 config: t.Optional[t.Union[C, str, t.Dict[Path, C]]] = None, 383 gateway: t.Optional[str] = None, 384 concurrent_tasks: t.Optional[int] = None, 385 loader: t.Optional[t.Type[Loader]] = None, 386 load: bool = True, 387 users: t.Optional[t.List[User]] = None, 388 config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, 389 selector: t.Optional[t.Type[Selector]] = None, 390 ): 391 self.configs = ( 392 config 393 if isinstance(config, dict) 394 else load_configs(config, self.CONFIG_TYPE, paths, **(config_loader_kwargs or {})) 395 ) 396 self._projects = {config.project for config in self.configs.values()} 397 self.dag: DAG[str] = DAG() 398 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 399 self._audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("audits") 400 self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( 401 "standaloneaudits" 402 ) 403 self._model_test_metadata: t.List[ModelTestMetadata] = [] 404 self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} 405 self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} 406 self._models_with_tests: t.Set[str] = set() 407 408 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 409 self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") 410 self._jinja_macros = JinjaMacroRegistry() 411 self._requirements: t.Dict[str, str] = {} 412 self._environment_statements: t.List[EnvironmentStatements] = [] 413 self._excluded_requirements: t.Set[str] = set() 414 self._engine_adapter: t.Optional[EngineAdapter] = None 415 self._linters: t.Dict[str, Linter] = {} 416 self._loaded: bool = False 417 self._selector_cls = selector or NativeSelector 418 419 self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) 420 421 self._all_dialects: t.Set[str] = {self.config.dialect or ""} 422 423 if self.config.disable_anonymized_analytics: 424 analytics.disable_analytics() 425 426 self.gateway = gateway 427 self._scheduler = self.config.get_scheduler(self.gateway) 428 self.environment_ttl = self.config.environment_ttl 429 self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) 430 self.auto_categorize_changes = self.config.plan.auto_categorize_changes 431 self.selected_gateway = (gateway or self.config.default_gateway_name).lower() 432 433 gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults 434 if gw_model_defaults: 435 # Merge global model defaults with the selected gateway's, if it's overriden 436 global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) 437 gateway_defaults = gw_model_defaults.model_dump(exclude_unset=True) 438 439 self.config.model_defaults = ModelDefaultsConfig( 440 **{**global_defaults, **gateway_defaults} 441 ) 442 443 # This allows overriding the default dialect's normalization strategy, so for example 444 # one can do `dialect="duckdb,normalization_strategy=lowercase"` and this will be 445 # applied to the DuckDB dialect globally 446 if "normalization_strategy" in str(self.config.dialect): 447 dialect = Dialect.get_or_raise(self.config.dialect) 448 type(dialect).NORMALIZATION_STRATEGY = dialect.normalization_strategy 449 450 self._loaders = [ 451 (loader or config.loader)(self, path, **config.loader_kwargs) 452 for path, config in self.configs.items() 453 ] 454 455 self._concurrent_tasks = concurrent_tasks 456 self._state_connection_config = ( 457 self.config.get_state_connection(self.gateway) or self.connection_config 458 ) 459 460 self._snapshot_evaluator: t.Optional[SnapshotEvaluator] = None 461 462 self.console = get_console() 463 setattr(self.console, "dialect", self.config.dialect) 464 465 self._provided_state_sync: t.Optional[StateSync] = state_sync 466 self._state_sync: t.Optional[StateSync] = None 467 468 # Should we dedupe notification_targets? If so how? 469 self.notification_targets = (notification_targets or []) + self.config.notification_targets 470 self.users = (users or []) + self.config.users 471 self.users = list({user.username: user for user in self.users}.values()) 472 self._register_notification_targets() 473 474 if load: 475 self.load() 476 477 @property 478 def default_dialect(self) -> t.Optional[str]: 479 return self.config.dialect 480 481 @property 482 def engine_adapter(self) -> EngineAdapter: 483 """Returns the default engine adapter.""" 484 if self._engine_adapter is None: 485 self._engine_adapter = self.connection_config.create_engine_adapter() 486 return self._engine_adapter 487 488 @property 489 def snapshot_evaluator(self) -> SnapshotEvaluator: 490 if not self._snapshot_evaluator: 491 self._snapshot_evaluator = SnapshotEvaluator( 492 { 493 gateway: adapter.with_settings(execute_log_level=logging.INFO) 494 for gateway, adapter in self.engine_adapters.items() 495 }, 496 ddl_concurrent_tasks=self.concurrent_tasks, 497 selected_gateway=self.selected_gateway, 498 ) 499 return self._snapshot_evaluator 500 501 def execution_context( 502 self, 503 deployability_index: t.Optional[DeployabilityIndex] = None, 504 engine_adapter: t.Optional[EngineAdapter] = None, 505 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 506 ) -> ExecutionContext: 507 """Returns an execution context.""" 508 return ExecutionContext( 509 engine_adapter=engine_adapter or self.engine_adapter, 510 snapshots=snapshots or self.snapshots, 511 deployability_index=deployability_index, 512 default_dialect=self.default_dialect, 513 default_catalog=self.default_catalog, 514 ) 515 516 @python_api_analytics 517 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 518 """Update or insert a model. 519 520 The context's models dictionary will be updated to include these changes. 521 522 Args: 523 model: Model name or instance to update. 524 kwargs: The kwargs to update the model with. 525 526 Returns: 527 A new instance of the updated or inserted model. 528 """ 529 model = self.get_model(model, raise_if_missing=True) 530 if not model.enabled: 531 raise SQLMeshError(f"The disabled model '{model.name}' cannot be upserted") 532 path = model._path 533 534 model = model.copy(update=kwargs) 535 model._path = path 536 537 self.dag.add(model.fqn, model.depends_on) 538 539 self._models.update( 540 { 541 model.fqn: model, 542 # bust the fingerprint cache for all downstream models 543 **{fqn: self._models[fqn].copy() for fqn in self.dag.downstream(model.fqn)}, 544 } 545 ) 546 547 update_model_schemas( 548 self.dag, 549 models=self._models, 550 cache_dir=self.cache_dir, 551 ) 552 553 if model.dialect: 554 self._all_dialects.add(model.dialect) 555 556 model.validate_definition() 557 558 return model 559 560 def scheduler( 561 self, 562 environment: t.Optional[str] = None, 563 snapshot_evaluator: t.Optional[SnapshotEvaluator] = None, 564 ) -> Scheduler: 565 """Returns the built-in scheduler. 566 567 Args: 568 environment: The target environment to source model snapshots from, or None 569 if snapshots should be sourced from the currently loaded local state. 570 571 Returns: 572 The built-in scheduler instance. 573 """ 574 snapshots: t.Iterable[Snapshot] 575 if environment is not None: 576 stored_environment = self.state_sync.get_environment(environment) 577 if stored_environment is None: 578 raise ConfigError(f"Environment '{environment}' was not found.") 579 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 580 else: 581 snapshots = self.snapshots.values() 582 583 if not snapshots: 584 raise ConfigError("No models were found") 585 586 return self.create_scheduler(snapshots, snapshot_evaluator or self.snapshot_evaluator) 587 588 def create_scheduler( 589 self, snapshots: t.Iterable[Snapshot], snapshot_evaluator: SnapshotEvaluator 590 ) -> Scheduler: 591 """Creates the built-in scheduler. 592 593 Args: 594 snapshots: The snapshots to schedule. 595 596 Returns: 597 The built-in scheduler instance. 598 """ 599 return Scheduler( 600 snapshots, 601 snapshot_evaluator, 602 self.state_sync, 603 default_catalog=self.default_catalog, 604 max_workers=self.concurrent_tasks, 605 console=self.console, 606 notification_target_manager=self.notification_target_manager, 607 ) 608 609 @property 610 def state_sync(self) -> StateSync: 611 if not self._state_sync: 612 self._state_sync = self._new_state_sync() 613 614 if self._state_sync.get_versions(validate=False).schema_version == 0: 615 self.console.log_status_update("Initializing new project state...") 616 self._state_sync.migrate() 617 self._state_sync.get_versions() 618 self._state_sync = CachingStateSync(self._state_sync) # type: ignore 619 return self._state_sync 620 621 @property 622 def state_reader(self) -> StateReader: 623 return self.state_sync 624 625 def refresh(self) -> None: 626 """Refresh all models that have been updated.""" 627 if any(loader.reload_needed() for loader in self._loaders): 628 self.load() 629 630 def load(self, update_schemas: bool = True) -> GenericContext[C]: 631 """Load all files in the context's path.""" 632 load_start_ts = time.perf_counter() 633 634 loaded_projects = [loader.load() for loader in self._loaders] 635 636 self.dag = DAG() 637 self._standalone_audits.clear() 638 self._audits.clear() 639 self._macros.clear() 640 self._models.clear() 641 self._metrics.clear() 642 self._requirements.clear() 643 self._excluded_requirements.clear() 644 self._linters.clear() 645 self._environment_statements = [] 646 self._model_test_metadata.clear() 647 self._model_test_metadata_path_index.clear() 648 self._model_test_metadata_fully_qualified_name_index.clear() 649 self._models_with_tests.clear() 650 651 for loader, project in zip(self._loaders, loaded_projects): 652 self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) 653 self._macros.update(project.macros) 654 self._models.update(project.models) 655 self._metrics.update(project.metrics) 656 self._audits.update(project.audits) 657 self._standalone_audits.update(project.standalone_audits) 658 self._requirements.update(project.requirements) 659 self._excluded_requirements.update(project.excluded_requirements) 660 self._environment_statements.extend(project.environment_statements) 661 662 self._model_test_metadata.extend(project.model_test_metadata) 663 for metadata in project.model_test_metadata: 664 if metadata.path not in self._model_test_metadata_path_index: 665 self._model_test_metadata_path_index[metadata.path] = [] 666 self._model_test_metadata_path_index[metadata.path].append(metadata) 667 self._model_test_metadata_fully_qualified_name_index[ 668 metadata.fully_qualified_test_name 669 ] = metadata 670 self._models_with_tests.add(metadata.model_name) 671 672 config = loader.config 673 self._linters[config.project] = Linter.from_rules( 674 BUILTIN_RULES.union(project.user_rules), config.linter 675 ) 676 677 # Load environment statements from state for projects not in current load 678 if any(self._projects): 679 prod = self.state_reader.get_environment(c.PROD) 680 if prod: 681 existing_statements = self.state_reader.get_environment_statements(c.PROD) 682 for stmt in existing_statements: 683 if stmt.project and stmt.project not in self._projects: 684 self._environment_statements.append(stmt) 685 686 uncached = set() 687 688 if any(self._projects): 689 prod = self.state_reader.get_environment(c.PROD) 690 691 if prod: 692 for snapshot in self.state_reader.get_snapshots(prod.snapshots).values(): 693 if snapshot.node.project in self._projects: 694 uncached.add(snapshot.name) 695 else: 696 store = self._standalone_audits if snapshot.is_audit else self._models 697 store[snapshot.name] = snapshot.node # type: ignore 698 699 for model in self._models.values(): 700 self.dag.add(model.fqn, model.depends_on) 701 702 if update_schemas: 703 for fqn in self.dag: 704 model = self._models.get(fqn) # type: ignore 705 706 if not model or fqn in uncached: 707 continue 708 709 # make a copy of remote models that depend on local models or in the downstream chain 710 # without this, a SELECT * FROM local will not propogate properly because the downstream 711 # model will get mutated (schema changes) but the object is the same as the remote cache 712 if any(dep in uncached for dep in model.depends_on): 713 uncached.add(fqn) 714 self._models.update({fqn: model.copy(update={"mapping_schema": {}})}) 715 continue 716 717 update_model_schemas( 718 self.dag, 719 models=self._models, 720 cache_dir=self.cache_dir, 721 ) 722 723 models = self.models.values() 724 for model in models: 725 # The model definition can be validated correctly only after the schema is set. 726 model.validate_definition() 727 728 duplicates = set(self._models) & set(self._standalone_audits) 729 if duplicates: 730 raise ConfigError( 731 f"Models and Standalone audits cannot have the same name: {duplicates}" 732 ) 733 734 self._all_dialects = {m.dialect for m in self._models.values() if m.dialect} | { 735 self.default_dialect or "" 736 } 737 738 analytics.collector.on_project_loaded( 739 project_type=self._project_type, 740 models_count=len(self._models), 741 audits_count=len(self._audits), 742 standalone_audits_count=len(self._standalone_audits), 743 macros_count=len(self._macros), 744 jinja_macros_count=len(self._jinja_macros.root_macros), 745 load_time_sec=time.perf_counter() - load_start_ts, 746 state_sync_fingerprint=self._scheduler.state_sync_fingerprint(self), 747 project_name=self.config.project, 748 ) 749 750 self._loaded = True 751 return self 752 753 @python_api_analytics 754 def run( 755 self, 756 environment: t.Optional[str] = None, 757 *, 758 start: t.Optional[TimeLike] = None, 759 end: t.Optional[TimeLike] = None, 760 execution_time: t.Optional[TimeLike] = None, 761 skip_janitor: bool = False, 762 ignore_cron: bool = False, 763 select_models: t.Optional[t.Collection[str]] = None, 764 exit_on_env_update: t.Optional[int] = None, 765 no_auto_upstream: bool = False, 766 ) -> CompletionStatus: 767 """Run the entire dag through the scheduler. 768 769 Args: 770 environment: The target environment to source model snapshots from and virtually update. Default: prod. 771 start: The start of the interval to render. 772 end: The end of the interval to render. 773 execution_time: The date/time time reference to use for execution time. Defaults to now. 774 skip_janitor: Whether to skip the janitor task. 775 ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals. 776 select_models: A list of model selection expressions to filter models that should run. Note that 777 upstream dependencies of selected models will also be evaluated. 778 exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update 779 to the target environment. 780 no_auto_upstream: Whether to not force upstream models to run. Only applicable when using `select_models`. 781 782 Returns: 783 True if the run was successful, False otherwise. 784 """ 785 environment = environment or self.config.default_target_environment 786 environment = Environment.sanitize_name(environment) 787 if not skip_janitor and environment.lower() == c.PROD: 788 self._run_janitor() 789 790 self.notification_target_manager.notify( 791 NotificationEvent.RUN_START, environment=environment 792 ) 793 analytics_run_id = analytics.collector.on_run_start( 794 engine_type=self.snapshot_evaluator.adapter.dialect, 795 state_sync_type=self.state_sync.state_type(), 796 ) 797 self._load_materializations() 798 799 env_check_attempts_num = max( 800 1, 801 self.config.run.environment_check_max_wait 802 // self.config.run.environment_check_interval, 803 ) 804 805 def _block_until_finalized() -> str: 806 for _ in range(env_check_attempts_num): 807 assert environment is not None # mypy 808 environment_state = self.state_sync.get_environment(environment) 809 if not environment_state: 810 raise SQLMeshError(f"Environment '{environment}' was not found.") 811 if environment_state.finalized_ts: 812 return environment_state.plan_id 813 self.console.log_warning( 814 f"Environment '{environment}' is being updated by plan '{environment_state.plan_id}'. " 815 f"Retrying in {self.config.run.environment_check_interval} seconds..." 816 ) 817 time.sleep(self.config.run.environment_check_interval) 818 raise SQLMeshError( 819 f"Exceeded the maximum wait time for environment '{environment}' to be ready. " 820 "This means that the environment either failed to update or the update is taking longer than expected. " 821 "See https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#run to adjust the timeout settings." 822 ) 823 824 success = False 825 interrupted = False 826 done = False 827 while not done: 828 plan_id_at_start = _block_until_finalized() 829 830 def _has_environment_changed() -> bool: 831 assert environment is not None # mypy 832 current_environment_state = self.state_sync.get_environment(environment) 833 return ( 834 not current_environment_state 835 or current_environment_state.plan_id != plan_id_at_start 836 or not current_environment_state.finalized_ts 837 ) 838 839 try: 840 completion_status = self._run( 841 environment, 842 start=start, 843 end=end, 844 execution_time=execution_time, 845 ignore_cron=ignore_cron, 846 select_models=select_models, 847 circuit_breaker=_has_environment_changed, 848 no_auto_upstream=no_auto_upstream, 849 ) 850 done = True 851 except CircuitBreakerError: 852 self.console.log_warning( 853 f"Environment '{environment}' modified while running. Restarting the run..." 854 ) 855 if exit_on_env_update: 856 interrupted = True 857 done = True 858 except Exception as e: 859 self.notification_target_manager.notify( 860 NotificationEvent.RUN_FAILURE, traceback.format_exc() 861 ) 862 logger.info("Run failed.", exc_info=e) 863 analytics.collector.on_run_end( 864 run_id=analytics_run_id, succeeded=False, interrupted=False, error=e 865 ) 866 raise e 867 868 if completion_status.is_success or interrupted: 869 self.notification_target_manager.notify( 870 NotificationEvent.RUN_END, environment=environment 871 ) 872 self.console.log_success(f"Run finished for environment '{environment}'") 873 elif completion_status.is_failure: 874 self.notification_target_manager.notify( 875 NotificationEvent.RUN_FAILURE, "See console logs for details." 876 ) 877 878 analytics.collector.on_run_end( 879 run_id=analytics_run_id, succeeded=success, interrupted=interrupted 880 ) 881 882 if interrupted and exit_on_env_update is not None: 883 sys.exit(exit_on_env_update) 884 885 return completion_status 886 887 @python_api_analytics 888 def run_janitor(self, ignore_ttl: bool) -> bool: 889 success = False 890 891 if self.console.start_cleanup(ignore_ttl): 892 try: 893 self._run_janitor(ignore_ttl) 894 success = True 895 finally: 896 self.console.stop_cleanup(success=success) 897 898 return success 899 900 @python_api_analytics 901 def destroy(self) -> bool: 902 success = False 903 904 # Collect resources to be deleted 905 environments = self.state_reader.get_environments() 906 schemas_to_delete = set() 907 tables_to_delete = set() 908 views_to_delete = set() 909 all_snapshot_infos = set() 910 911 # For each environment find schemas and tables 912 for environment in environments: 913 all_snapshot_infos.update(environment.snapshots) 914 snapshots = self.state_reader.get_snapshots(environment.snapshots).values() 915 for snapshot in snapshots: 916 if snapshot.is_model and not snapshot.is_symbolic: 917 # Get the appropriate adapter 918 if environment.gateway_managed and snapshot.model_gateway: 919 adapter = self.engine_adapters.get( 920 snapshot.model_gateway, self.engine_adapter 921 ) 922 else: 923 adapter = self.engine_adapter 924 925 if environment.suffix_target.is_schema or environment.suffix_target.is_catalog: 926 schema = snapshot.qualified_view_name.schema_for_environment( 927 environment.naming_info, dialect=adapter.dialect 928 ) 929 catalog = snapshot.qualified_view_name.catalog_for_environment( 930 environment.naming_info, dialect=adapter.dialect 931 ) 932 if catalog: 933 schemas_to_delete.add(f"{catalog}.{schema}") 934 else: 935 schemas_to_delete.add(schema) 936 937 if environment.suffix_target.is_table: 938 view_name = snapshot.qualified_view_name.for_environment( 939 environment.naming_info, dialect=adapter.dialect 940 ) 941 views_to_delete.add(view_name) 942 943 # Add snapshot tables 944 table_name = snapshot.table_name() 945 tables_to_delete.add(table_name) 946 947 if self.console.start_destroy(schemas_to_delete, views_to_delete, tables_to_delete): 948 try: 949 success = self._destroy() 950 finally: 951 self.console.stop_destroy(success=success) 952 953 return success 954 955 @t.overload 956 def get_model( 957 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: Literal[True] = True 958 ) -> Model: ... 959 960 @t.overload 961 def get_model( 962 self, 963 model_or_snapshot: ModelOrSnapshot, 964 raise_if_missing: Literal[False] = False, 965 ) -> t.Optional[Model]: ... 966 967 def get_model( 968 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: bool = False 969 ) -> t.Optional[Model]: 970 """Returns a model with the given name or None if a model with such name doesn't exist. 971 972 Args: 973 model_or_snapshot: A model name, model, or snapshot. 974 raise_if_missing: Raises an error if a model is not found. 975 976 Returns: 977 The expected model. 978 """ 979 if isinstance(model_or_snapshot, Snapshot): 980 return model_or_snapshot.model 981 if not isinstance(model_or_snapshot, str): 982 return model_or_snapshot 983 984 try: 985 # We should try all dialects referenced in the project for cases when models use mixed dialects. 986 for dialect in self._all_dialects: 987 normalized_name = normalize_model_name( 988 model_or_snapshot, 989 dialect=dialect, 990 default_catalog=self.default_catalog, 991 ) 992 if normalized_name in self._models: 993 return self._models[normalized_name] 994 except: 995 pass 996 997 if raise_if_missing: 998 if model_or_snapshot.endswith((".sql", ".py")): 999 msg = "Resolving models by path is not supported, please pass in the model name instead." 1000 else: 1001 msg = f"Cannot find model with name '{model_or_snapshot}'" 1002 1003 raise SQLMeshError(msg) 1004 1005 return None 1006 1007 @t.overload 1008 def get_snapshot(self, node_or_snapshot: NodeOrSnapshot) -> t.Optional[Snapshot]: ... 1009 1010 @t.overload 1011 def get_snapshot( 1012 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[True] 1013 ) -> Snapshot: ... 1014 1015 @t.overload 1016 def get_snapshot( 1017 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: Literal[False] 1018 ) -> t.Optional[Snapshot]: ... 1019 1020 def get_snapshot( 1021 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: bool = False 1022 ) -> t.Optional[Snapshot]: 1023 """Returns a snapshot with the given name or None if a snapshot with such name doesn't exist. 1024 1025 Args: 1026 node_or_snapshot: A node name, node, or snapshot. 1027 raise_if_missing: Raises an error if a snapshot is not found. 1028 1029 Returns: 1030 The expected snapshot. 1031 """ 1032 if isinstance(node_or_snapshot, Snapshot): 1033 return node_or_snapshot 1034 fqn = self._node_or_snapshot_to_fqn(node_or_snapshot) 1035 snapshot = self.snapshots.get(fqn) 1036 1037 if raise_if_missing and not snapshot: 1038 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1039 1040 return snapshot 1041 1042 def config_for_path(self, path: Path) -> t.Tuple[Config, Path]: 1043 """Returns the config and path of the said project for a given file path.""" 1044 for config_path, config in self.configs.items(): 1045 try: 1046 path.relative_to(config_path) 1047 return config, config_path 1048 except ValueError: 1049 pass 1050 return self.config, self.path 1051 1052 def config_for_node(self, node: Model | Audit) -> Config: 1053 path = node._path 1054 if path is None: 1055 return self.config 1056 return self.config_for_path(path)[0] # type: ignore 1057 1058 @property 1059 def models(self) -> MappingProxyType[str, Model]: 1060 """Returns all registered models in this context.""" 1061 return MappingProxyType(self._models) 1062 1063 @property 1064 def metrics(self) -> MappingProxyType[str, Metric]: 1065 """Returns all registered metrics in this context.""" 1066 return MappingProxyType(self._metrics) 1067 1068 @property 1069 def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: 1070 """Returns all registered standalone audits in this context.""" 1071 return MappingProxyType(self._standalone_audits) 1072 1073 @property 1074 def models_with_tests(self) -> t.Set[str]: 1075 """Returns all models with tests in this context.""" 1076 return self._models_with_tests 1077 1078 @property 1079 def snapshots(self) -> t.Dict[str, Snapshot]: 1080 """Generates and returns snapshots based on models registered in this context. 1081 1082 If one of the snapshots has been previously stored in the persisted state, the stored 1083 instance will be returned. 1084 """ 1085 return self._snapshots() 1086 1087 @property 1088 def requirements(self) -> t.Dict[str, str]: 1089 """Returns the Python dependencies of the project loaded in this context.""" 1090 return self._requirements.copy() 1091 1092 @cached_property 1093 def default_catalog(self) -> t.Optional[str]: 1094 return self.default_catalog_per_gateway.get(self.selected_gateway) 1095 1096 @python_api_analytics 1097 def render( 1098 self, 1099 model_or_snapshot: ModelOrSnapshot, 1100 *, 1101 start: t.Optional[TimeLike] = None, 1102 end: t.Optional[TimeLike] = None, 1103 execution_time: t.Optional[TimeLike] = None, 1104 expand: t.Union[bool, t.Iterable[str]] = False, 1105 **kwargs: t.Any, 1106 ) -> exp.Expression: 1107 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 1108 1109 Args: 1110 model_or_snapshot: The model, model name, or snapshot to render. 1111 start: The start of the interval to render. 1112 end: The end of the interval to render. 1113 execution_time: The date/time time reference to use for execution time. Defaults to now. 1114 expand: Whether or not to use expand materialized models, defaults to False. 1115 If True, all referenced models are expanded as raw queries. 1116 If a list, only referenced models are expanded as raw queries. 1117 1118 Returns: 1119 The rendered expression. 1120 """ 1121 execution_time = execution_time or now() 1122 1123 model = self.get_model(model_or_snapshot, raise_if_missing=True) 1124 1125 if expand and not isinstance(expand, bool): 1126 expand = { 1127 normalize_model_name( 1128 x, default_catalog=self.default_catalog, dialect=self.default_dialect 1129 ) 1130 for x in expand 1131 } 1132 1133 expand = self.dag.upstream(model.fqn) if expand is True else expand or [] 1134 1135 if model.is_seed: 1136 import pandas as pd 1137 1138 df = next( 1139 model.render( 1140 context=self.execution_context( 1141 engine_adapter=self._get_engine_adapter(model.gateway) 1142 ), 1143 start=start, 1144 end=end, 1145 execution_time=execution_time, 1146 **kwargs, 1147 ) 1148 ) 1149 return next(pandas_to_sql(t.cast(pd.DataFrame, df), model.columns_to_types)) 1150 1151 snapshots = self.snapshots 1152 deployability_index = DeployabilityIndex.create(snapshots.values(), start=start) 1153 1154 return model.render_query_or_raise( 1155 start=start, 1156 end=end, 1157 execution_time=execution_time, 1158 snapshots=snapshots, 1159 expand=expand, 1160 deployability_index=deployability_index, 1161 engine_adapter=self._get_engine_adapter(model.gateway), 1162 **kwargs, 1163 ) 1164 1165 @python_api_analytics 1166 def evaluate( 1167 self, 1168 model_or_snapshot: ModelOrSnapshot, 1169 start: TimeLike, 1170 end: TimeLike, 1171 execution_time: TimeLike, 1172 limit: t.Optional[int] = None, 1173 **kwargs: t.Any, 1174 ) -> DF: 1175 """Evaluate a model or snapshot (running its query against a DB/Engine). 1176 1177 This method is used to test or iterate on models without side effects. 1178 1179 Args: 1180 model_or_snapshot: The model, model name, or snapshot to render. 1181 start: The start of the interval to evaluate. 1182 end: The end of the interval to evaluate. 1183 execution_time: The date/time time reference to use for execution time. 1184 limit: A limit applied to the model. 1185 """ 1186 snapshots = self.snapshots 1187 fqn = self._node_or_snapshot_to_fqn(model_or_snapshot) 1188 if fqn not in snapshots: 1189 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1190 snapshot = snapshots[fqn] 1191 1192 # Expand all uncategorized parents since physical tables don't exist for them yet 1193 expand = [ 1194 parent 1195 for parent in self.dag.upstream(snapshot.model.fqn) 1196 if (parent_snapshot := snapshots.get(parent)) 1197 and parent_snapshot.is_model 1198 and parent_snapshot.model.is_sql 1199 and not parent_snapshot.categorized 1200 ] 1201 1202 df = self.snapshot_evaluator.evaluate_and_fetch( 1203 snapshot, 1204 start=start, 1205 end=end, 1206 execution_time=execution_time, 1207 snapshots=self.snapshots, 1208 limit=limit or c.DEFAULT_MAX_LIMIT, 1209 expand=expand, 1210 ) 1211 1212 if df is None: 1213 raise RuntimeError(f"Error evaluating {snapshot.name}") 1214 1215 return df 1216 1217 @python_api_analytics 1218 def format( 1219 self, 1220 transpile: t.Optional[str] = None, 1221 rewrite_casts: t.Optional[bool] = None, 1222 append_newline: t.Optional[bool] = None, 1223 *, 1224 check: t.Optional[bool] = None, 1225 paths: t.Optional[t.Tuple[t.Union[str, Path], ...]] = None, 1226 **kwargs: t.Any, 1227 ) -> bool: 1228 """Format all SQL models and audits.""" 1229 filtered_targets = [ 1230 target 1231 for target in chain(self._models.values(), self._audits.values()) 1232 if target._path is not None 1233 and target._path.suffix == ".sql" 1234 and (not paths or any(target._path.samefile(p) for p in paths)) 1235 ] 1236 unformatted_file_paths = [] 1237 1238 for target in filtered_targets: 1239 if ( 1240 target._path is None or target.formatting is False 1241 ): # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop 1242 continue 1243 1244 with open(target._path, "r+", encoding="utf-8") as file: 1245 before = file.read() 1246 1247 after = self._format( 1248 target, 1249 before, 1250 transpile=transpile, 1251 rewrite_casts=rewrite_casts, 1252 append_newline=append_newline, 1253 **kwargs, 1254 ) 1255 1256 if not check: 1257 file.seek(0) 1258 file.write(after) 1259 file.truncate() 1260 elif before != after: 1261 unformatted_file_paths.append(target._path) 1262 1263 if unformatted_file_paths: 1264 for path in unformatted_file_paths: 1265 self.console.log_status_update(f"{path} needs reformatting.") 1266 self.console.log_status_update( 1267 f"\n{len(unformatted_file_paths)} file(s) need reformatting." 1268 ) 1269 return False 1270 1271 return True 1272 1273 def _format( 1274 self, 1275 target: Model | Audit, 1276 before: str, 1277 *, 1278 transpile: t.Optional[str] = None, 1279 rewrite_casts: t.Optional[bool] = None, 1280 append_newline: t.Optional[bool] = None, 1281 **kwargs: t.Any, 1282 ) -> str: 1283 expressions = parse(before, default_dialect=self.config_for_node(target).dialect) 1284 if transpile and is_meta_expression(expressions[0]): 1285 for prop in expressions[0].expressions: 1286 if prop.name.lower() == "dialect": 1287 prop.replace( 1288 exp.Property( 1289 this="dialect", 1290 value=exp.Literal.string(transpile or target.dialect), 1291 ) 1292 ) 1293 1294 format_config = self.config_for_node(target).format 1295 after = format_model_expressions( 1296 expressions, 1297 transpile or target.dialect, 1298 rewrite_casts=( 1299 rewrite_casts if rewrite_casts is not None else not format_config.no_rewrite_casts 1300 ), 1301 **{**format_config.generator_options, **kwargs}, 1302 ) 1303 1304 if append_newline is None: 1305 append_newline = format_config.append_newline 1306 if append_newline: 1307 after += "\n" 1308 1309 return after 1310 1311 @python_api_analytics 1312 def plan( 1313 self, 1314 environment: t.Optional[str] = None, 1315 *, 1316 start: t.Optional[TimeLike] = None, 1317 end: t.Optional[TimeLike] = None, 1318 execution_time: t.Optional[TimeLike] = None, 1319 create_from: t.Optional[str] = None, 1320 skip_tests: t.Optional[bool] = None, 1321 restate_models: t.Optional[t.Iterable[str]] = None, 1322 no_gaps: t.Optional[bool] = None, 1323 skip_backfill: t.Optional[bool] = None, 1324 empty_backfill: t.Optional[bool] = None, 1325 forward_only: t.Optional[bool] = None, 1326 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1327 allow_additive_models: t.Optional[t.Collection[str]] = None, 1328 no_prompts: t.Optional[bool] = None, 1329 auto_apply: t.Optional[bool] = None, 1330 no_auto_categorization: t.Optional[bool] = None, 1331 effective_from: t.Optional[TimeLike] = None, 1332 include_unmodified: t.Optional[bool] = None, 1333 select_models: t.Optional[t.Collection[str]] = None, 1334 backfill_models: t.Optional[t.Collection[str]] = None, 1335 categorizer_config: t.Optional[CategorizerConfig] = None, 1336 enable_preview: t.Optional[bool] = None, 1337 no_diff: t.Optional[bool] = None, 1338 run: t.Optional[bool] = None, 1339 diff_rendered: t.Optional[bool] = None, 1340 skip_linter: t.Optional[bool] = None, 1341 explain: t.Optional[bool] = None, 1342 ignore_cron: t.Optional[bool] = None, 1343 min_intervals: t.Optional[int] = None, 1344 ) -> Plan: 1345 """Interactively creates a plan. 1346 1347 This method compares the current context with the target environment. It then presents 1348 the differences and asks whether to backfill each modified model. 1349 1350 Args: 1351 environment: The environment to diff and plan against. 1352 start: The start date of the backfill if there is one. 1353 end: The end date of the backfill if there is one. 1354 execution_time: The date/time reference to use for execution time. Defaults to now. 1355 create_from: The environment to create the target environment from if it 1356 doesn't exist. If not specified, the "prod" environment will be used. 1357 skip_tests: Unit tests are run by default so this will skip them if enabled 1358 restate_models: A list of either internal or external models, or tags, that need to be restated 1359 for the given plan interval. If the target environment is a production environment, 1360 ALL snapshots that depended on these upstream tables will have their intervals deleted 1361 (even ones not in this current environment). Only the snapshots in this environment will 1362 be backfilled whereas others need to be recovered on a future plan application. For development 1363 environments only snapshots that are part of this plan will be affected. 1364 no_gaps: Whether to ensure that new snapshots for models that are already a 1365 part of the target environment have no data gaps when compared against previous 1366 snapshots for same models. 1367 skip_backfill: Whether to skip the backfill step. Default: False. 1368 empty_backfill: Like skip_backfill, but also records processed intervals. 1369 forward_only: Whether the purpose of the plan is to make forward only changes. 1370 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1371 allow_additive_models: Models whose forward-only changes are allowed to be additive. 1372 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 1373 if this flag is set to true and there are uncategorized changes the plan creation will 1374 fail. Default: False. 1375 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 1376 no_auto_categorization: Indicates whether to disable automatic categorization of model 1377 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1378 option determines the behavior. 1379 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1380 project config by default. 1381 effective_from: The effective date from which to apply forward-only changes on production. 1382 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1383 select_models: A list of model selection strings to filter the models that should be included into this plan. 1384 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1385 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1386 no_diff: Hide text differences for changed models. 1387 run: Whether to run latest intervals as part of the plan application. 1388 diff_rendered: Whether the diff should compare raw vs rendered models 1389 skip_linter: Linter runs by default so this will skip it if enabled 1390 explain: Whether to explain the plan instead of applying it. 1391 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1392 on every model when checking for missing intervals 1393 1394 Returns: 1395 The populated Plan object. 1396 """ 1397 plan_builder = self.plan_builder( 1398 environment, 1399 start=start, 1400 end=end, 1401 execution_time=execution_time, 1402 create_from=create_from, 1403 skip_tests=skip_tests, 1404 restate_models=restate_models, 1405 no_gaps=no_gaps, 1406 skip_backfill=skip_backfill, 1407 empty_backfill=empty_backfill, 1408 forward_only=forward_only, 1409 allow_destructive_models=allow_destructive_models, 1410 allow_additive_models=allow_additive_models, 1411 no_auto_categorization=no_auto_categorization, 1412 effective_from=effective_from, 1413 include_unmodified=include_unmodified, 1414 select_models=select_models, 1415 backfill_models=backfill_models, 1416 categorizer_config=categorizer_config, 1417 enable_preview=enable_preview, 1418 run=run, 1419 diff_rendered=diff_rendered, 1420 skip_linter=skip_linter, 1421 explain=explain, 1422 ignore_cron=ignore_cron, 1423 min_intervals=min_intervals, 1424 ) 1425 1426 plan = plan_builder.build() 1427 1428 if no_auto_categorization or plan.uncategorized: 1429 # Prompts are required if the auto categorization is disabled 1430 # or if there are any uncategorized snapshots in the plan 1431 no_prompts = False 1432 1433 if explain: 1434 auto_apply = True 1435 1436 self.console.plan( 1437 plan_builder, 1438 auto_apply if auto_apply is not None else self.config.plan.auto_apply, 1439 self.default_catalog, 1440 no_diff=no_diff if no_diff is not None else self.config.plan.no_diff, 1441 no_prompts=no_prompts if no_prompts is not None else self.config.plan.no_prompts, 1442 ) 1443 1444 return plan 1445 1446 @python_api_analytics 1447 def plan_builder( 1448 self, 1449 environment: t.Optional[str] = None, 1450 *, 1451 start: t.Optional[TimeLike] = None, 1452 end: t.Optional[TimeLike] = None, 1453 execution_time: t.Optional[TimeLike] = None, 1454 create_from: t.Optional[str] = None, 1455 skip_tests: t.Optional[bool] = None, 1456 restate_models: t.Optional[t.Iterable[str]] = None, 1457 no_gaps: t.Optional[bool] = None, 1458 skip_backfill: t.Optional[bool] = None, 1459 empty_backfill: t.Optional[bool] = None, 1460 forward_only: t.Optional[bool] = None, 1461 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1462 allow_additive_models: t.Optional[t.Collection[str]] = None, 1463 no_auto_categorization: t.Optional[bool] = None, 1464 effective_from: t.Optional[TimeLike] = None, 1465 include_unmodified: t.Optional[bool] = None, 1466 select_models: t.Optional[t.Collection[str]] = None, 1467 backfill_models: t.Optional[t.Collection[str]] = None, 1468 categorizer_config: t.Optional[CategorizerConfig] = None, 1469 enable_preview: t.Optional[bool] = None, 1470 run: t.Optional[bool] = None, 1471 diff_rendered: t.Optional[bool] = None, 1472 skip_linter: t.Optional[bool] = None, 1473 explain: t.Optional[bool] = None, 1474 ignore_cron: t.Optional[bool] = None, 1475 min_intervals: t.Optional[int] = None, 1476 always_include_local_changes: t.Optional[bool] = None, 1477 ) -> PlanBuilder: 1478 """Creates a plan builder. 1479 1480 Args: 1481 environment: The environment to diff and plan against. 1482 start: The start date of the backfill if there is one. 1483 end: The end date of the backfill if there is one. 1484 execution_time: The date/time reference to use for execution time. Defaults to now. 1485 create_from: The environment to create the target environment from if it 1486 doesn't exist. If not specified, the "prod" environment will be used. 1487 skip_tests: Unit tests are run by default so this will skip them if enabled 1488 restate_models: A list of either internal or external models, or tags, that need to be restated 1489 for the given plan interval. If the target environment is a production environment, 1490 ALL snapshots that depended on these upstream tables will have their intervals deleted 1491 (even ones not in this current environment). Only the snapshots in this environment will 1492 be backfilled whereas others need to be recovered on a future plan application. For development 1493 environments only snapshots that are part of this plan will be affected. 1494 no_gaps: Whether to ensure that new snapshots for models that are already a 1495 part of the target environment have no data gaps when compared against previous 1496 snapshots for same models. 1497 skip_backfill: Whether to skip the backfill step. Default: False. 1498 empty_backfill: Like skip_backfill, but also records processed intervals. 1499 forward_only: Whether the purpose of the plan is to make forward only changes. 1500 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1501 no_auto_categorization: Indicates whether to disable automatic categorization of model 1502 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1503 option determines the behavior. 1504 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1505 project config by default. 1506 effective_from: The effective date from which to apply forward-only changes on production. 1507 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1508 select_models: A list of model selection strings to filter the models that should be included into this plan. 1509 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1510 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1511 run: Whether to run latest intervals as part of the plan application. 1512 diff_rendered: Whether the diff should compare raw vs rendered models 1513 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1514 on every model when checking for missing intervals 1515 always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. 1516 However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. 1517 1518 Returns: 1519 The plan builder. 1520 """ 1521 kwargs: t.Dict[str, t.Optional[UserProvidedFlags]] = { 1522 "start": start, 1523 "end": end, 1524 "execution_time": execution_time, 1525 "create_from": create_from, 1526 "skip_tests": skip_tests, 1527 "restate_models": list(restate_models) if restate_models is not None else None, 1528 "no_gaps": no_gaps, 1529 "skip_backfill": skip_backfill, 1530 "empty_backfill": empty_backfill, 1531 "forward_only": forward_only, 1532 "allow_destructive_models": list(allow_destructive_models) 1533 if allow_destructive_models is not None 1534 else None, 1535 "allow_additive_models": list(allow_additive_models) 1536 if allow_additive_models is not None 1537 else None, 1538 "no_auto_categorization": no_auto_categorization, 1539 "effective_from": effective_from, 1540 "include_unmodified": include_unmodified, 1541 "select_models": list(select_models) if select_models is not None else None, 1542 "backfill_models": list(backfill_models) if backfill_models is not None else None, 1543 "enable_preview": enable_preview, 1544 "run": run, 1545 "diff_rendered": diff_rendered, 1546 "skip_linter": skip_linter, 1547 "min_intervals": min_intervals, 1548 } 1549 user_provided_flags: t.Dict[str, UserProvidedFlags] = { 1550 k: v for k, v in kwargs.items() if v is not None 1551 } 1552 1553 skip_tests = explain or skip_tests or False 1554 no_gaps = no_gaps or False 1555 skip_backfill = skip_backfill or False 1556 empty_backfill = empty_backfill or False 1557 run = run or False 1558 diff_rendered = diff_rendered or False 1559 skip_linter = skip_linter or False 1560 1561 environment = environment or self.config.default_target_environment 1562 environment = Environment.sanitize_name(environment) 1563 is_dev = environment != c.PROD 1564 1565 if include_unmodified is None: 1566 include_unmodified = self.config.plan.include_unmodified 1567 1568 if skip_backfill and not no_gaps and not is_dev: 1569 # note: we deliberately don't mention the --no-gaps flag in case the plan came from the sqlmesh_dbt command 1570 # todo: perhaps we could have better error messages if we check sys.argv[0] for which cli is running? 1571 self.console.log_warning( 1572 "Skipping the backfill stage for production can lead to unexpected results, such as tables being empty or incremental data with non-contiguous time ranges being made available.\n" 1573 "If you are doing this deliberately to create an empty version of a table to test a change, please consider using Virtual Data Environments instead." 1574 ) 1575 1576 if not skip_linter: 1577 self.lint_models() 1578 1579 self._run_plan_tests(skip_tests=skip_tests) 1580 1581 environment_ttl = ( 1582 self.environment_ttl if environment not in self.pinned_environments else None 1583 ) 1584 1585 model_selector = self._new_selector() 1586 1587 if allow_destructive_models: 1588 expanded_destructive_models = model_selector.expand_model_selections( 1589 allow_destructive_models 1590 ) 1591 else: 1592 expanded_destructive_models = None 1593 1594 if allow_additive_models: 1595 expanded_additive_models = model_selector.expand_model_selections(allow_additive_models) 1596 else: 1597 expanded_additive_models = None 1598 1599 if backfill_models: 1600 backfill_models = model_selector.expand_model_selections(backfill_models) 1601 else: 1602 backfill_models = None 1603 1604 models_override: t.Optional[UniqueKeyDict[str, Model]] = None 1605 if select_models: 1606 try: 1607 models_override = model_selector.select_models( 1608 select_models, 1609 environment, 1610 fallback_env_name=create_from or c.PROD, 1611 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1612 ) 1613 except SQLMeshError as e: 1614 logger.exception(e) # ensure the full stack trace is logged 1615 raise PlanError( 1616 f"{e}\nCheck the SQLMesh log file for the full stack trace.\nIf the model has been fixed locally, please ensure that the --select-model expression includes it." 1617 ) 1618 if not backfill_models: 1619 # Only backfill selected models unless explicitly specified. 1620 backfill_models = model_selector.expand_model_selections(select_models) 1621 1622 expanded_restate_models = None 1623 if restate_models is not None: 1624 expanded_restate_models = model_selector.expand_model_selections(restate_models) 1625 1626 if (restate_models is not None and not expanded_restate_models) or ( 1627 backfill_models is not None and not backfill_models 1628 ): 1629 raise PlanError( 1630 "Selector did not return any models. Please check your model selection and try again." 1631 ) 1632 1633 if always_include_local_changes is None: 1634 # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes 1635 force_no_diff = restate_models is not None or ( 1636 backfill_models is not None and not backfill_models 1637 ) 1638 else: 1639 force_no_diff = not always_include_local_changes 1640 1641 snapshots = self._snapshots(models_override) 1642 context_diff = self._context_diff( 1643 environment or c.PROD, 1644 snapshots=snapshots, 1645 create_from=create_from, 1646 force_no_diff=force_no_diff, 1647 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1648 diff_rendered=diff_rendered, 1649 always_recreate_environment=self.config.plan.always_recreate_environment, 1650 ) 1651 modified_model_names = { 1652 *context_diff.modified_snapshots, 1653 *[s.name for s in context_diff.added], 1654 } 1655 1656 if ( 1657 is_dev 1658 and not include_unmodified 1659 and backfill_models is None 1660 and expanded_restate_models is None 1661 ): 1662 # Only backfill modified and added models. 1663 # This ensures that no models outside the impacted sub-DAG(s) will be backfilled unexpectedly. 1664 backfill_models = modified_model_names or None 1665 1666 max_interval_end_per_model = None 1667 default_start, default_end = None, None 1668 if not run: 1669 ignore_cron = False 1670 max_interval_end_per_model = self._get_max_interval_end_per_model( 1671 snapshots, backfill_models 1672 ) 1673 # If no end date is specified, use the max interval end from prod 1674 # to prevent unintended evaluation of the entire DAG. 1675 default_start, default_end = self._get_plan_default_start_end( 1676 snapshots, 1677 max_interval_end_per_model, 1678 backfill_models, 1679 modified_model_names, 1680 execution_time or now(), 1681 ) 1682 1683 # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model. 1684 self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values()) 1685 1686 start_override_per_model = self._calculate_start_override_per_model( 1687 min_intervals, 1688 start or default_start, 1689 end or default_end, 1690 execution_time or now(), 1691 backfill_models, 1692 snapshots, 1693 max_interval_end_per_model, 1694 ) 1695 1696 if not self.config.virtual_environment_mode.is_full: 1697 forward_only = True 1698 elif forward_only is None: 1699 forward_only = self.config.plan.forward_only 1700 1701 # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments 1702 # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition 1703 restate_all_snapshots = ( 1704 expanded_restate_models is not None 1705 and not is_dev 1706 and self.config.virtual_environment_mode.is_full 1707 ) 1708 1709 return self.PLAN_BUILDER_TYPE( 1710 context_diff=context_diff, 1711 start=start, 1712 end=end, 1713 execution_time=execution_time, 1714 apply=self.apply, 1715 restate_models=expanded_restate_models, 1716 restate_all_snapshots=restate_all_snapshots, 1717 backfill_models=backfill_models, 1718 no_gaps=no_gaps, 1719 skip_backfill=skip_backfill, 1720 empty_backfill=empty_backfill, 1721 is_dev=is_dev, 1722 forward_only=forward_only, 1723 allow_destructive_models=expanded_destructive_models, 1724 allow_additive_models=expanded_additive_models, 1725 environment_ttl=environment_ttl, 1726 environment_suffix_target=self.config.environment_suffix_target, 1727 environment_catalog_mapping=self.environment_catalog_mapping, 1728 categorizer_config=categorizer_config or self.auto_categorize_changes, 1729 auto_categorization_enabled=not no_auto_categorization, 1730 effective_from=effective_from, 1731 include_unmodified=include_unmodified, 1732 default_start=default_start, 1733 default_end=default_end, 1734 enable_preview=( 1735 enable_preview if enable_preview is not None else self._plan_preview_enabled 1736 ), 1737 end_bounded=not run, 1738 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1739 start_override_per_model=start_override_per_model, 1740 end_override_per_model=max_interval_end_per_model, 1741 console=self.console, 1742 user_provided_flags=user_provided_flags, 1743 selected_models={ 1744 dbt_unique_id 1745 for model in model_selector.expand_model_selections(select_models or "*") 1746 if (dbt_unique_id := snapshots[model].node.dbt_unique_id) 1747 }, 1748 explain=explain or False, 1749 ignore_cron=ignore_cron or False, 1750 ) 1751 1752 def apply( 1753 self, 1754 plan: Plan, 1755 circuit_breaker: t.Optional[t.Callable[[], bool]] = None, 1756 ) -> None: 1757 """Applies a plan by pushing snapshots and backfilling data. 1758 1759 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 1760 to backfill all models. 1761 1762 Args: 1763 plan: The plan to apply. 1764 circuit_breaker: An optional handler which checks if the apply should be aborted. 1765 """ 1766 if ( 1767 not plan.context_diff.has_changes 1768 and not plan.requires_backfill 1769 and not plan.has_unmodified_unpromoted 1770 ): 1771 return 1772 if plan.uncategorized: 1773 raise UncategorizedPlanError("Can't apply a plan with uncategorized changes.") 1774 1775 if plan.explain: 1776 explainer = PlanExplainer( 1777 state_reader=self.state_reader, 1778 default_catalog=self.default_catalog, 1779 console=self.console, 1780 ) 1781 explainer.evaluate(plan.to_evaluatable()) 1782 return 1783 1784 self.notification_target_manager.notify( 1785 NotificationEvent.APPLY_START, 1786 environment=plan.environment_naming_info.name, 1787 plan_id=plan.plan_id, 1788 ) 1789 try: 1790 self._apply(plan, circuit_breaker) 1791 except Exception as e: 1792 self.notification_target_manager.notify( 1793 NotificationEvent.APPLY_FAILURE, 1794 environment=plan.environment_naming_info.name, 1795 plan_id=plan.plan_id, 1796 exc=traceback.format_exc(), 1797 ) 1798 logger.info("Plan application failed.", exc_info=e) 1799 raise e 1800 self.notification_target_manager.notify( 1801 NotificationEvent.APPLY_END, 1802 environment=plan.environment_naming_info.name, 1803 plan_id=plan.plan_id, 1804 ) 1805 1806 @python_api_analytics 1807 def invalidate_environment(self, name: str, sync: bool = False) -> None: 1808 """Invalidates the target environment by setting its expiration timestamp to now. 1809 1810 Args: 1811 name: The name of the environment to invalidate. 1812 sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will 1813 be deleted asynchronously by the janitor process. 1814 """ 1815 name = Environment.sanitize_name(name) 1816 self.state_sync.invalidate_environment(name) 1817 if sync: 1818 self._cleanup_environments() 1819 self.console.log_success(f"Environment '{name}' deleted.") 1820 else: 1821 self.console.log_success(f"Environment '{name}' invalidated.") 1822 1823 @python_api_analytics 1824 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> bool: 1825 """Show a diff of the current context with a given environment. 1826 1827 Args: 1828 environment: The environment to diff against. 1829 detailed: Show the actual SQL differences if True. 1830 1831 Returns: 1832 True if there are changes, False otherwise. 1833 """ 1834 environment = environment or self.config.default_target_environment 1835 environment = Environment.sanitize_name(environment) 1836 context_diff = self._context_diff(environment) 1837 self.console.show_environment_difference_summary( 1838 context_diff, 1839 no_diff=not detailed, 1840 ) 1841 if context_diff.has_changes: 1842 self.console.show_model_difference_summary( 1843 context_diff, 1844 EnvironmentNamingInfo.from_environment_catalog_mapping( 1845 self.environment_catalog_mapping, 1846 name=environment, 1847 suffix_target=self.config.environment_suffix_target, 1848 normalize_name=context_diff.normalize_environment_name, 1849 ), 1850 self.default_catalog, 1851 no_diff=not detailed, 1852 ) 1853 return context_diff.has_changes 1854 1855 @python_api_analytics 1856 def table_diff( 1857 self, 1858 source: str, 1859 target: str, 1860 on: t.Optional[t.List[str] | exp.Condition] = None, 1861 skip_columns: t.Optional[t.List[str]] = None, 1862 select_models: t.Optional[t.Collection[str]] = None, 1863 where: t.Optional[str | exp.Condition] = None, 1864 limit: int = 20, 1865 show: bool = True, 1866 show_sample: bool = True, 1867 decimals: int = 3, 1868 skip_grain_check: bool = False, 1869 warn_grain_check: bool = False, 1870 temp_schema: t.Optional[str] = None, 1871 schema_diff_ignore_case: bool = False, 1872 **kwargs: t.Any, # catch-all to prevent an 'unexpected keyword argument' error if an table_diff extension passes in some extra arguments 1873 ) -> t.List[TableDiff]: 1874 """Show a diff between two tables. 1875 1876 Args: 1877 source: The source environment or table. 1878 target: The target environment or table. 1879 on: The join condition, table aliases must be "s" and "t" for source and target. 1880 If omitted, the table's grain will be used. 1881 skip_columns: The columns to skip when computing the table diff. 1882 select_models: The models or snapshots to use when environments are passed in. 1883 where: An optional where statement to filter results. 1884 limit: The limit of the sample dataframe. 1885 show: Show the table diff output in the console. 1886 show_sample: Show the sample dataframe in the console. Requires show=True. 1887 decimals: The number of decimal places to keep when comparing floating point columns. 1888 skip_grain_check: Skip check for rows that contain null or duplicate grains. 1889 temp_schema: The schema to use for temporary tables. 1890 1891 Returns: 1892 The list of TableDiff objects containing schema and summary differences. 1893 """ 1894 1895 if "|" in source or "|" in target: 1896 raise ConfigError( 1897 "Cross-database table diffing is available in Tobiko Cloud. Read more here: " 1898 "https://sqlmesh.readthedocs.io/en/stable/guides/tablediff/#diffing-tables-or-views-across-gateways" 1899 ) 1900 1901 table_diffs: t.List[TableDiff] = [] 1902 1903 # Diffs multiple or a single model across two environments 1904 if select_models: 1905 source_env = self.state_reader.get_environment(source) 1906 target_env = self.state_reader.get_environment(target) 1907 if not source_env: 1908 raise SQLMeshError(f"Could not find environment '{source}'") 1909 if not target_env: 1910 raise SQLMeshError(f"Could not find environment '{target}'") 1911 criteria = ", ".join(f"'{c}'" for c in select_models) 1912 try: 1913 selected_models = self._new_selector().expand_model_selections(select_models) 1914 if not selected_models: 1915 self.console.log_status_update( 1916 f"No models matched the selection criteria: {criteria}" 1917 ) 1918 except Exception as e: 1919 raise SQLMeshError(e) 1920 1921 models_to_diff: t.List[ 1922 t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Condition]] 1923 ] = [] 1924 models_without_grain: t.List[Model] = [] 1925 source_snapshots_to_name = { 1926 snapshot.name: snapshot for snapshot in source_env.snapshots 1927 } 1928 target_snapshots_to_name = { 1929 snapshot.name: snapshot for snapshot in target_env.snapshots 1930 } 1931 1932 for model_fqn in selected_models: 1933 model = self._models[model_fqn] 1934 adapter = self._get_engine_adapter(model.gateway) 1935 source_snapshot = source_snapshots_to_name.get(model.fqn) 1936 target_snapshot = target_snapshots_to_name.get(model.fqn) 1937 1938 if target_snapshot and source_snapshot: 1939 if (source_snapshot.fingerprint != target_snapshot.fingerprint) and ( 1940 (source_snapshot.version != target_snapshot.version) 1941 or source_snapshot.is_forward_only 1942 ): 1943 # Compare the virtual layer instead of the physical layer because the virtual layer is guaranteed to point 1944 # to the correct/active snapshot for the model in the specified environment, taking into account things like dev previews 1945 source = source_snapshot.qualified_view_name.for_environment( 1946 source_env.naming_info, adapter.dialect 1947 ) 1948 target = target_snapshot.qualified_view_name.for_environment( 1949 target_env.naming_info, adapter.dialect 1950 ) 1951 model_on = on or model.on 1952 if not model_on: 1953 models_without_grain.append(model) 1954 else: 1955 models_to_diff.append((model, adapter, source, target, model_on)) 1956 1957 if models_without_grain: 1958 model_names = "\n".join( 1959 f"─ {model.name} \n at '{model._path}'" for model in models_without_grain 1960 ) 1961 message = ( 1962 "SQLMesh doesn't know how to join the tables for the following models:\n" 1963 f"{model_names}\n\n" 1964 "Please specify a `grain` in each model definition. It must be unique and not null." 1965 ) 1966 if warn_grain_check: 1967 self.console.log_warning(message) 1968 else: 1969 raise SQLMeshError(message) 1970 1971 if models_to_diff: 1972 self.console.show_table_diff_details( 1973 [model[0].name for model in models_to_diff], 1974 ) 1975 1976 self.console.start_table_diff_progress(len(models_to_diff)) 1977 try: 1978 tasks_num = min(len(models_to_diff), self.concurrent_tasks) 1979 table_diffs = concurrent_apply_to_values( 1980 list(models_to_diff), 1981 lambda model_info: self._model_diff( 1982 model=model_info[0], 1983 adapter=model_info[1], 1984 source=model_info[2], 1985 target=model_info[3], 1986 on=model_info[4], 1987 source_alias=source_env.name, 1988 target_alias=target_env.name, 1989 limit=limit, 1990 decimals=decimals, 1991 skip_columns=skip_columns, 1992 where=where, 1993 show=show, 1994 temp_schema=temp_schema, 1995 skip_grain_check=skip_grain_check, 1996 schema_diff_ignore_case=schema_diff_ignore_case, 1997 ), 1998 tasks_num=tasks_num, 1999 ) 2000 self.console.stop_table_diff_progress(success=True) 2001 except: 2002 self.console.stop_table_diff_progress(success=False) 2003 raise 2004 elif selected_models: 2005 self.console.log_status_update( 2006 f"No models contain differences with the selection criteria: {criteria}" 2007 ) 2008 2009 else: 2010 table_diffs = [ 2011 self._table_diff( 2012 source=source, 2013 target=target, 2014 source_alias=source, 2015 target_alias=target, 2016 limit=limit, 2017 decimals=decimals, 2018 adapter=self.engine_adapter, 2019 on=on, 2020 skip_columns=skip_columns, 2021 where=where, 2022 schema_diff_ignore_case=schema_diff_ignore_case, 2023 ) 2024 ] 2025 2026 if show: 2027 self.console.show_table_diff(table_diffs, show_sample, skip_grain_check, temp_schema) 2028 2029 return table_diffs 2030 2031 def _model_diff( 2032 self, 2033 model: Model, 2034 adapter: EngineAdapter, 2035 source: str, 2036 target: str, 2037 source_alias: str, 2038 target_alias: str, 2039 limit: int, 2040 decimals: int, 2041 on: t.Optional[t.List[str] | exp.Condition] = None, 2042 skip_columns: t.Optional[t.List[str]] = None, 2043 where: t.Optional[str | exp.Condition] = None, 2044 show: bool = True, 2045 temp_schema: t.Optional[str] = None, 2046 skip_grain_check: bool = False, 2047 schema_diff_ignore_case: bool = False, 2048 ) -> TableDiff: 2049 self.console.start_table_diff_model_progress(model.name) 2050 2051 table_diff = self._table_diff( 2052 on=on, 2053 skip_columns=skip_columns, 2054 where=where, 2055 limit=limit, 2056 decimals=decimals, 2057 model=model, 2058 adapter=adapter, 2059 source=source, 2060 target=target, 2061 source_alias=source_alias, 2062 target_alias=target_alias, 2063 schema_diff_ignore_case=schema_diff_ignore_case, 2064 ) 2065 2066 if show: 2067 # Trigger row_diff in parallel execution so it's available for ordered display later 2068 table_diff.row_diff(temp_schema=temp_schema, skip_grain_check=skip_grain_check) 2069 2070 self.console.update_table_diff_progress(model.name) 2071 2072 return table_diff 2073 2074 def _table_diff( 2075 self, 2076 source: str, 2077 target: str, 2078 source_alias: str, 2079 target_alias: str, 2080 limit: int, 2081 decimals: int, 2082 adapter: EngineAdapter, 2083 on: t.Optional[t.List[str] | exp.Condition] = None, 2084 model: t.Optional[Model] = None, 2085 skip_columns: t.Optional[t.List[str]] = None, 2086 where: t.Optional[str | exp.Condition] = None, 2087 schema_diff_ignore_case: bool = False, 2088 ) -> TableDiff: 2089 if not on: 2090 raise SQLMeshError( 2091 "SQLMesh doesn't know how to join the two tables. Specify the `grains` in each model definition or pass join column names in separate `-o` flags." 2092 ) 2093 2094 return TableDiff( 2095 adapter=adapter.with_settings(execute_log_level=logger.getEffectiveLevel()), 2096 source=source, 2097 target=target, 2098 on=on, 2099 skip_columns=skip_columns, 2100 where=where, 2101 source_alias=source_alias, 2102 target_alias=target_alias, 2103 limit=limit, 2104 decimals=decimals, 2105 model_name=model.name if model else None, 2106 model_dialect=model.dialect if model else None, 2107 schema_diff_ignore_case=schema_diff_ignore_case, 2108 ) 2109 2110 @python_api_analytics 2111 def get_dag( 2112 self, select_models: t.Optional[t.Collection[str]] = None, **options: t.Any 2113 ) -> GraphHTML: 2114 """Gets an HTML object representation of the DAG. 2115 2116 Args: 2117 select_models: A list of model selection strings that should be included in the dag. 2118 Returns: 2119 An html object that renders the dag. 2120 """ 2121 dag = ( 2122 self.dag.prune(*self._new_selector().expand_model_selections(select_models)) 2123 if select_models 2124 else self.dag 2125 ) 2126 2127 nodes = {} 2128 edges: t.List[t.Dict] = [] 2129 2130 for node, deps in dag.graph.items(): 2131 nodes[node] = { 2132 "id": node, 2133 "label": node.split(".")[-1], 2134 "title": f"<span>{node}</span>", 2135 } 2136 edges.extend({"from": d, "to": node} for d in deps) 2137 2138 return GraphHTML( 2139 nodes, 2140 edges, 2141 options={ 2142 "height": "100%", 2143 "width": "100%", 2144 "interaction": {}, 2145 "layout": { 2146 "hierarchical": { 2147 "enabled": True, 2148 "nodeSpacing": 200, 2149 "sortMethod": "directed", 2150 }, 2151 }, 2152 "nodes": { 2153 "shape": "box", 2154 }, 2155 **options, 2156 }, 2157 ) 2158 2159 @python_api_analytics 2160 def render_dag(self, path: str, select_models: t.Optional[t.Collection[str]] = None) -> None: 2161 """Render the dag as HTML and save it to a file. 2162 2163 Args: 2164 path: filename to save the dag html to 2165 select_models: A list of model selection strings that should be included in the dag. 2166 """ 2167 file_path = Path(path) 2168 suffix = file_path.suffix 2169 if suffix != ".html": 2170 if suffix: 2171 get_console().log_warning( 2172 f"The extension {suffix} does not designate an html file. A file with a `.html` extension will be created instead." 2173 ) 2174 path = str(file_path.with_suffix(".html")) 2175 2176 with open(path, "w", encoding="utf-8") as file: 2177 file.write(str(self.get_dag(select_models))) 2178 2179 @python_api_analytics 2180 def create_test( 2181 self, 2182 model: str, 2183 input_queries: t.Dict[str, str], 2184 overwrite: bool = False, 2185 variables: t.Optional[t.Dict[str, str]] = None, 2186 path: t.Optional[str] = None, 2187 name: t.Optional[str] = None, 2188 include_ctes: bool = False, 2189 ) -> None: 2190 """Generate a unit test fixture for a given model. 2191 2192 Args: 2193 model: The model to test. 2194 input_queries: Mapping of model names to queries. Each model included in this mapping 2195 will be populated in the test based on the results of the corresponding query. 2196 overwrite: Whether to overwrite the existing test in case of a file path collision. 2197 When set to False, an error will be raised if there is such a collision. 2198 variables: Key-value pairs that will define variables needed by the model. 2199 path: The file path corresponding to the fixture, relative to the test directory. 2200 By default, the fixture will be created under the test directory and the file name 2201 will be inferred from the test's name. 2202 name: The name of the test. This is inferred from the model name by default. 2203 include_ctes: When true, CTE fixtures will also be generated. 2204 """ 2205 input_queries = { 2206 # The get_model here has two purposes: return normalized names & check for missing deps 2207 self.get_model(dep, raise_if_missing=True).fqn: query 2208 for dep, query in input_queries.items() 2209 } 2210 2211 try: 2212 model_to_test = self.get_model(model, raise_if_missing=True) 2213 test_adapter = self.test_connection_config.create_engine_adapter( 2214 register_comments_override=False 2215 ) 2216 2217 generate_test( 2218 model=model_to_test, 2219 input_queries=input_queries, 2220 models=self._models, 2221 engine_adapter=self._get_engine_adapter(model_to_test.gateway), 2222 test_engine_adapter=test_adapter, 2223 project_path=self.path, 2224 overwrite=overwrite, 2225 variables=variables, 2226 path=path, 2227 name=name, 2228 include_ctes=include_ctes, 2229 ) 2230 finally: 2231 if test_adapter: 2232 test_adapter.close() 2233 2234 @python_api_analytics 2235 def test( 2236 self, 2237 match_patterns: t.Optional[t.List[str]] = None, 2238 tests: t.Optional[t.List[str]] = None, 2239 verbosity: Verbosity = Verbosity.DEFAULT, 2240 preserve_fixtures: bool = False, 2241 stream: t.Optional[t.TextIO] = None, 2242 ) -> ModelTextTestResult: 2243 """Discover and run model tests""" 2244 if verbosity >= Verbosity.VERBOSE: 2245 import pandas as pd 2246 2247 pd.set_option("display.max_columns", None) 2248 2249 test_meta = self.select_tests(tests=tests, patterns=match_patterns) 2250 2251 result = run_tests( 2252 model_test_metadata=test_meta, 2253 models=self._models, 2254 config=self.config, 2255 selected_gateway=self.selected_gateway, 2256 dialect=self.default_dialect, 2257 verbosity=verbosity, 2258 preserve_fixtures=preserve_fixtures, 2259 stream=stream, 2260 default_catalog=self.default_catalog, 2261 default_catalog_dialect=self.config.dialect or "", 2262 ) 2263 2264 self.console.log_test_results( 2265 result, 2266 self.test_connection_config._engine_adapter.DIALECT, 2267 ) 2268 2269 return result 2270 2271 @python_api_analytics 2272 def audit( 2273 self, 2274 start: TimeLike, 2275 end: TimeLike, 2276 *, 2277 models: t.Optional[t.Iterator[str]] = None, 2278 execution_time: t.Optional[TimeLike] = None, 2279 ) -> bool: 2280 """Audit models. 2281 2282 Args: 2283 start: The start of the interval to audit. 2284 end: The end of the interval to audit. 2285 models: The models to audit. All models will be audited if not specified. 2286 execution_time: The date/time time reference to use for execution time. Defaults to now. 2287 2288 Returns: 2289 False if any of the audits failed, True otherwise. 2290 """ 2291 2292 snapshots = ( 2293 [self.get_snapshot(model, raise_if_missing=True) for model in models] 2294 if models 2295 else self.snapshots.values() 2296 ) 2297 2298 num_audits = sum(len(snapshot.node.audits_with_args) for snapshot in snapshots) 2299 self.console.log_status_update(f"Found {num_audits} audit(s).") 2300 2301 errors = [] 2302 skipped_count = 0 2303 for snapshot in snapshots: 2304 for audit_result in self.snapshot_evaluator.audit( 2305 snapshot=snapshot, 2306 start=start, 2307 end=end, 2308 execution_time=execution_time, 2309 snapshots=self.snapshots, 2310 ): 2311 audit_id = f"{audit_result.audit.name}" 2312 if audit_result.model: 2313 audit_id += f" on model {audit_result.model.name}" 2314 2315 if audit_result.skipped: 2316 self.console.log_status_update(f"{audit_id} ⏸️ SKIPPED.") 2317 skipped_count += 1 2318 elif audit_result.count: 2319 errors.append(audit_result) 2320 self.console.log_status_update( 2321 f"{audit_id} ❌ [red]FAIL [{audit_result.count}][/red]." 2322 ) 2323 else: 2324 self.console.log_status_update(f"{audit_id} ✅ [green]PASS[/green].") 2325 2326 self.console.log_status_update( 2327 f"\nFinished with {len(errors)} audit error{'' if len(errors) == 1 else 's'} " 2328 f"and {skipped_count} audit{'' if skipped_count == 1 else 's'} skipped." 2329 ) 2330 for error in errors: 2331 self.console.log_status_update( 2332 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 2333 ) 2334 self.console.log_status_update(f"Got {error.count} results, expected 0.") 2335 if error.query: 2336 self.console.show_sql( 2337 f"{error.query.sql(dialect=self.snapshot_evaluator.adapter.dialect)}" 2338 ) 2339 2340 self.console.log_status_update("Done.") 2341 return not errors 2342 2343 @python_api_analytics 2344 def rewrite(self, sql: str, dialect: str = "") -> exp.Expression: 2345 """Rewrite a sql expression with semantic references into an executable query. 2346 2347 https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ 2348 2349 Args: 2350 sql: The sql string to rewrite. 2351 dialect: The dialect of the sql string, defaults to the project dialect. 2352 2353 Returns: 2354 A SQLGlot expression with semantic references expanded. 2355 """ 2356 return rewrite( 2357 sql, 2358 graph=ReferenceGraph(self.models.values()), 2359 metrics=self._metrics, 2360 dialect=dialect or self.default_dialect, 2361 ) 2362 2363 @python_api_analytics 2364 def check_intervals( 2365 self, 2366 environment: t.Optional[str], 2367 no_signals: bool, 2368 select_models: t.Collection[str], 2369 start: t.Optional[TimeLike] = None, 2370 end: t.Optional[TimeLike] = None, 2371 ) -> t.Dict[Snapshot, SnapshotIntervals]: 2372 """Check intervals for a given environment. 2373 2374 Args: 2375 environment: The environment or prod if None. 2376 select_models: A list of model selection strings to show intervals for. 2377 start: The start of the intervals to check. 2378 end: The end of the intervals to check. 2379 """ 2380 2381 environment = environment or c.PROD 2382 env = self.state_reader.get_environment(environment) 2383 if not env: 2384 raise SQLMeshError(f"Environment '{environment}' was not found.") 2385 2386 snapshots = {k.name: v for k, v in self.state_sync.get_snapshots(env.snapshots).items()} 2387 2388 missing = { 2389 k.name: v 2390 for k, v in missing_intervals( 2391 snapshots.values(), start=start, end=end, execution_time=end 2392 ).items() 2393 } 2394 2395 if select_models: 2396 selected: t.Collection[str] = self._select_models_for_run( 2397 select_models, True, snapshots.values() 2398 ) 2399 else: 2400 selected = snapshots.keys() 2401 2402 results = {} 2403 execution_context = self.execution_context(snapshots=snapshots) 2404 2405 for fqn in selected: 2406 snapshot = snapshots[fqn] 2407 intervals = missing.get(fqn) or [] 2408 2409 results[snapshot] = SnapshotIntervals( 2410 snapshot.snapshot_id, 2411 intervals 2412 if no_signals 2413 else snapshot.check_ready_intervals(intervals, execution_context), 2414 ) 2415 2416 return results 2417 2418 @python_api_analytics 2419 def migrate(self) -> None: 2420 """Migrates SQLMesh to the current running version. 2421 2422 Please contact your SQLMesh administrator before doing this. 2423 """ 2424 self.notification_target_manager.notify(NotificationEvent.MIGRATION_START) 2425 self._load_materializations() 2426 try: 2427 self._new_state_sync().migrate( 2428 promoted_snapshots_only=self.config.migration.promoted_snapshots_only, 2429 ) 2430 except Exception as e: 2431 self.notification_target_manager.notify( 2432 NotificationEvent.MIGRATION_FAILURE, traceback.format_exc() 2433 ) 2434 raise e 2435 self.notification_target_manager.notify(NotificationEvent.MIGRATION_END) 2436 2437 @python_api_analytics 2438 def rollback(self) -> None: 2439 """Rolls back SQLMesh to the previous migration. 2440 2441 Please contact your SQLMesh administrator before doing this. This action cannot be undone. 2442 """ 2443 self._new_state_sync().rollback() 2444 2445 @python_api_analytics 2446 def create_external_models(self, strict: bool = False) -> None: 2447 """Create a file to document the schema of external models. 2448 2449 The external models file contains all columns and types of external models, allowing for more 2450 robust lineage, validation, and optimizations. 2451 2452 Args: 2453 strict: If True, raise an error if the external model is missing in the database. 2454 """ 2455 if not self._models: 2456 self.load(update_schemas=False) 2457 2458 for path, config in self.configs.items(): 2459 deprecated_yaml = path / c.EXTERNAL_MODELS_DEPRECATED_YAML 2460 2461 external_models_yaml = ( 2462 path / c.EXTERNAL_MODELS_YAML if not deprecated_yaml.exists() else deprecated_yaml 2463 ) 2464 2465 external_models_gateway: t.Optional[str] = self.gateway or self.config.default_gateway 2466 if not external_models_gateway: 2467 # can happen if there was no --gateway defined and the default_gateway is '' 2468 # which means that the single gateway syntax is being used which means there is 2469 # no named gateway which means we should not stamp `gateway:` on the external models 2470 external_models_gateway = None 2471 2472 create_external_models_file( 2473 path=external_models_yaml, 2474 models=UniqueKeyDict( 2475 "models", 2476 { 2477 fqn: model 2478 for fqn, model in self._models.items() 2479 if self.config_for_node(model) is config 2480 }, 2481 ), 2482 adapter=self.engine_adapter, 2483 state_reader=self.state_reader, 2484 dialect=config.model_defaults.dialect, 2485 gateway=external_models_gateway, 2486 max_workers=self.concurrent_tasks, 2487 strict=strict, 2488 ) 2489 2490 @python_api_analytics 2491 def print_info( 2492 self, skip_connection: bool = False, verbosity: Verbosity = Verbosity.DEFAULT 2493 ) -> None: 2494 """Prints information about connections, models, macros, etc. to the console.""" 2495 self.console.log_status_update(f"Models: {len(self.models)}") 2496 self.console.log_status_update(f"Macros: {len(self._macros) - len(macro.get_registry())}") 2497 2498 if skip_connection: 2499 return 2500 2501 if verbosity >= Verbosity.VERBOSE: 2502 self.console.log_status_update("") 2503 print_config(self.config.get_connection(self.gateway), self.console, "Connection") 2504 print_config( 2505 self.config.get_test_connection(self.gateway), self.console, "Test Connection" 2506 ) 2507 print_config( 2508 self.config.get_state_connection(self.gateway), self.console, "State Connection" 2509 ) 2510 2511 self._try_connection("data warehouse", self.engine_adapter.ping) 2512 state_connection = self.config.get_state_connection(self.gateway) 2513 if state_connection: 2514 self._try_connection("state backend", state_connection.connection_validator()) 2515 2516 @python_api_analytics 2517 def print_environment_names(self) -> None: 2518 """Prints all environment names along with expiry datetime.""" 2519 result = self._new_state_sync().get_environments_summary() 2520 if not result: 2521 raise SQLMeshError( 2522 "This project has no environments. Create an environment using the `sqlmesh plan` command." 2523 ) 2524 self.console.print_environments(result) 2525 2526 def close(self) -> None: 2527 """Releases all resources allocated by this context.""" 2528 if self._snapshot_evaluator: 2529 self._snapshot_evaluator.close() 2530 2531 if self._state_sync: 2532 self._state_sync.close() 2533 2534 def _run( 2535 self, 2536 environment: str, 2537 *, 2538 start: t.Optional[TimeLike], 2539 end: t.Optional[TimeLike], 2540 execution_time: t.Optional[TimeLike], 2541 ignore_cron: bool, 2542 select_models: t.Optional[t.Collection[str]], 2543 circuit_breaker: t.Optional[t.Callable[[], bool]], 2544 no_auto_upstream: bool, 2545 ) -> CompletionStatus: 2546 scheduler = self.scheduler(environment=environment) 2547 snapshots = scheduler.snapshots 2548 2549 if select_models is not None: 2550 select_models = self._select_models_for_run( 2551 select_models, no_auto_upstream, snapshots.values() 2552 ) 2553 2554 completion_status = scheduler.run( 2555 environment, 2556 start=start, 2557 end=end, 2558 execution_time=execution_time, 2559 ignore_cron=ignore_cron, 2560 circuit_breaker=circuit_breaker, 2561 selected_snapshots=select_models, 2562 auto_restatement_enabled=environment.lower() == c.PROD, 2563 run_environment_statements=True, 2564 ) 2565 2566 if completion_status.is_nothing_to_do: 2567 next_run_ready_msg = "" 2568 2569 next_ready_interval_start = get_next_model_interval_start(snapshots.values()) 2570 if next_ready_interval_start: 2571 utc_time = format_tz_datetime(next_ready_interval_start) 2572 local_time = format_tz_datetime(next_ready_interval_start, use_local_timezone=True) 2573 time_msg = local_time if local_time == utc_time else f"{local_time} ({utc_time})" 2574 next_run_ready_msg = f"\n\nNext run will be ready at {time_msg}." 2575 2576 self.console.log_status_update( 2577 f"No models are ready to run. Please wait until a model `cron` interval has elapsed.{next_run_ready_msg}" 2578 ) 2579 2580 return completion_status 2581 2582 def _apply(self, plan: Plan, circuit_breaker: t.Optional[t.Callable[[], bool]]) -> None: 2583 self._scheduler.create_plan_evaluator(self).evaluate( 2584 plan.to_evaluatable(), circuit_breaker=circuit_breaker 2585 ) 2586 2587 @python_api_analytics 2588 def table_name( 2589 self, model_name: str, environment: t.Optional[str] = None, prod: bool = False 2590 ) -> str: 2591 """Returns the name of the pysical table for the given model name in the target environment. 2592 2593 Args: 2594 model_name: The name of the model. 2595 environment: The environment to source the model version from. 2596 prod: If True, return the name of the physical table that will be used in production for the model version 2597 promoted in the target environment. 2598 2599 Returns: 2600 The name of the physical table. 2601 """ 2602 environment = environment or self.config.default_target_environment 2603 fqn = self._node_or_snapshot_to_fqn(model_name) 2604 target_env = self.state_reader.get_environment(environment) 2605 if not target_env: 2606 raise SQLMeshError(f"Environment '{environment}' was not found.") 2607 2608 snapshot_info = None 2609 for s in target_env.snapshots: 2610 if s.name == fqn: 2611 snapshot_info = s 2612 break 2613 if not snapshot_info: 2614 raise SQLMeshError( 2615 f"Model '{model_name}' was not found in environment '{environment}'." 2616 ) 2617 2618 if target_env.name == c.PROD or prod: 2619 return snapshot_info.table_name() 2620 2621 snapshots = self.state_reader.get_snapshots(target_env.snapshots) 2622 deployability_index = DeployabilityIndex.create(snapshots) 2623 2624 return snapshot_info.table_name( 2625 is_deployable=deployability_index.is_deployable(snapshot_info.snapshot_id) 2626 ) 2627 2628 def clear_caches(self) -> None: 2629 paths_to_remove = [path / c.CACHE for path in self.configs] 2630 paths_to_remove.append(self.cache_dir) 2631 2632 if IS_WINDOWS: 2633 paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] 2634 2635 for path in paths_to_remove: 2636 if path.exists(): 2637 rmtree(path) 2638 2639 if isinstance(self._state_sync, CachingStateSync): 2640 self._state_sync.clear_cache() 2641 2642 def export_state( 2643 self, 2644 output_file: Path, 2645 environment_names: t.Optional[t.List[str]] = None, 2646 local_only: bool = False, 2647 confirm: bool = True, 2648 ) -> None: 2649 from sqlmesh.core.state_sync.export_import import export_state 2650 2651 # trigger a connection to the StateSync so we can fail early if there is a problem 2652 # note we still need to do this even if we are doing a local export so we know what 'versions' to write 2653 self.state_sync.get_versions(validate=True) 2654 2655 local_snapshots = self.snapshots if local_only else None 2656 2657 if self.console.start_state_export( 2658 output_file=output_file, 2659 gateway=self.selected_gateway, 2660 state_connection_config=self._state_connection_config, 2661 environment_names=environment_names, 2662 local_only=local_only, 2663 confirm=confirm, 2664 ): 2665 try: 2666 export_state( 2667 state_sync=self.state_sync, 2668 output_file=output_file, 2669 local_snapshots=local_snapshots, 2670 environment_names=environment_names, 2671 console=self.console, 2672 ) 2673 self.console.stop_state_export(success=True, output_file=output_file) 2674 except: 2675 self.console.stop_state_export(success=False, output_file=output_file) 2676 raise 2677 2678 def import_state(self, input_file: Path, clear: bool = False, confirm: bool = True) -> None: 2679 from sqlmesh.core.state_sync.export_import import import_state 2680 2681 if self.console.start_state_import( 2682 input_file=input_file, 2683 gateway=self.selected_gateway, 2684 state_connection_config=self._state_connection_config, 2685 clear=clear, 2686 confirm=confirm, 2687 ): 2688 try: 2689 import_state( 2690 state_sync=self.state_sync, 2691 input_file=input_file, 2692 clear=clear, 2693 console=self.console, 2694 ) 2695 self.console.stop_state_import(success=True, input_file=input_file) 2696 except: 2697 self.console.stop_state_import(success=False, input_file=input_file) 2698 raise 2699 2700 def _run_tests( 2701 self, verbosity: Verbosity = Verbosity.DEFAULT 2702 ) -> t.Tuple[ModelTextTestResult, str]: 2703 test_output_io = StringIO() 2704 result = self.test(stream=test_output_io, verbosity=verbosity) 2705 return result, test_output_io.getvalue() 2706 2707 def _run_plan_tests(self, skip_tests: bool = False) -> t.Optional[ModelTextTestResult]: 2708 if not skip_tests: 2709 result = self.test() 2710 if not result.wasSuccessful(): 2711 raise PlanError( 2712 "Cannot generate plan due to failing test(s). Fix test(s) and run again." 2713 ) 2714 return result 2715 return None 2716 2717 @property 2718 def _model_tables(self) -> t.Dict[str, str]: 2719 """Mapping of model name to physical table name. 2720 2721 If a snapshot has not been versioned yet, its view name will be returned. 2722 """ 2723 return { 2724 fqn: ( 2725 snapshot.table_name() 2726 if snapshot.version 2727 else snapshot.qualified_view_name.for_environment( 2728 EnvironmentNamingInfo.from_environment_catalog_mapping( 2729 self.environment_catalog_mapping, 2730 name=c.PROD, 2731 suffix_target=self.config.environment_suffix_target, 2732 ) 2733 ) 2734 ) 2735 for fqn, snapshot in self.snapshots.items() 2736 } 2737 2738 @cached_property 2739 def cache_dir(self) -> Path: 2740 if self.config.cache_dir: 2741 cache_path = Path(self.config.cache_dir) 2742 if cache_path.is_absolute(): 2743 return cache_path 2744 return self.path / cache_path 2745 2746 # Default to .cache directory in the project path 2747 return self.path / c.CACHE 2748 2749 @cached_property 2750 def engine_adapters(self) -> t.Dict[str, EngineAdapter]: 2751 """Returns all the engine adapters for the gateways defined in the configurations.""" 2752 adapters: t.Dict[str, EngineAdapter] = {self.selected_gateway: self.engine_adapter} 2753 for config in self.configs.values(): 2754 for gateway_name in config.gateways: 2755 if gateway_name not in adapters: 2756 connection = config.get_connection(gateway_name) 2757 adapter = connection.create_engine_adapter( 2758 concurrent_tasks=self.concurrent_tasks, 2759 ) 2760 adapters[gateway_name] = adapter 2761 return adapters 2762 2763 @cached_property 2764 def default_catalog_per_gateway(self) -> t.Dict[str, str]: 2765 """Returns the default catalogs for each engine adapter.""" 2766 return self._scheduler.get_default_catalog_per_gateway(self) 2767 2768 @property 2769 def concurrent_tasks(self) -> int: 2770 if self._concurrent_tasks is None: 2771 self._concurrent_tasks = self.connection_config.concurrent_tasks 2772 return self._concurrent_tasks 2773 2774 @cached_property 2775 def connection_config(self) -> ConnectionConfig: 2776 return self.config.get_connection(self.selected_gateway) 2777 2778 @cached_property 2779 def test_connection_config(self) -> ConnectionConfig: 2780 return self.config.get_test_connection( 2781 self.gateway, 2782 self.default_catalog, 2783 default_catalog_dialect=self.config.dialect, 2784 ) 2785 2786 @cached_property 2787 def environment_catalog_mapping(self) -> RegexKeyDict: 2788 engine_adapter = None 2789 try: 2790 engine_adapter = self.engine_adapter 2791 except Exception: 2792 pass 2793 2794 if ( 2795 self.config.environment_catalog_mapping 2796 and engine_adapter 2797 and not self.engine_adapter.catalog_support.is_multi_catalog_supported 2798 ): 2799 raise SQLMeshError( 2800 "Environment catalog mapping is only supported for engine adapters that support multiple catalogs" 2801 ) 2802 return self.config.environment_catalog_mapping 2803 2804 def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 2805 if gateway: 2806 if adapter := self.engine_adapters.get(gateway): 2807 return adapter 2808 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 2809 return self.engine_adapter 2810 2811 def _snapshots( 2812 self, models_override: t.Optional[UniqueKeyDict[str, Model]] = None 2813 ) -> t.Dict[str, Snapshot]: 2814 nodes = {**(models_override or self._models), **self._standalone_audits} 2815 snapshots = self._nodes_to_snapshots(nodes) 2816 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2817 2818 unrestorable_snapshots = { 2819 snapshot 2820 for snapshot in stored_snapshots.values() 2821 if snapshot.name in nodes and snapshot.unrestorable 2822 } 2823 if unrestorable_snapshots: 2824 for snapshot in unrestorable_snapshots: 2825 logger.info( 2826 "Found a unrestorable snapshot %s. Restamping the model...", snapshot.name 2827 ) 2828 node = nodes[snapshot.name] 2829 nodes[snapshot.name] = node.copy( 2830 update={"stamp": f"revert to {snapshot.identifier}"} 2831 ) 2832 snapshots = self._nodes_to_snapshots(nodes) 2833 stored_snapshots = self.state_reader.get_snapshots(snapshots.values()) 2834 2835 for snapshot in stored_snapshots.values(): 2836 # Keep the original model instance to preserve the query cache. 2837 snapshot.node = snapshots[snapshot.name].node 2838 2839 return {name: stored_snapshots.get(s.snapshot_id, s) for name, s in snapshots.items()} 2840 2841 def _context_diff( 2842 self, 2843 environment: str, 2844 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 2845 create_from: t.Optional[str] = None, 2846 force_no_diff: bool = False, 2847 ensure_finalized_snapshots: bool = False, 2848 diff_rendered: bool = False, 2849 always_recreate_environment: bool = False, 2850 ) -> ContextDiff: 2851 environment = Environment.sanitize_name(environment) 2852 if force_no_diff: 2853 return ContextDiff.create_no_diff(environment, self.state_reader) 2854 2855 return ContextDiff.create( 2856 environment, 2857 snapshots=snapshots or self.snapshots, 2858 create_from=create_from or c.PROD, 2859 state_reader=self.state_reader, 2860 provided_requirements=self._requirements, 2861 excluded_requirements=self._excluded_requirements, 2862 ensure_finalized_snapshots=ensure_finalized_snapshots, 2863 diff_rendered=diff_rendered, 2864 environment_statements=self._environment_statements, 2865 gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, 2866 infer_python_dependencies=self.config.infer_python_dependencies, 2867 always_recreate_environment=always_recreate_environment, 2868 ) 2869 2870 def _destroy(self) -> bool: 2871 # Invalidate all environments, including prod 2872 for environment in self.state_reader.get_environments(): 2873 self.state_sync.invalidate_environment(name=environment.name, protect_prod=False) 2874 self.console.log_success(f"Environment '{environment.name}' invalidated.") 2875 2876 # Run janitor to clean up all objects 2877 self._run_janitor(ignore_ttl=True) 2878 2879 # Remove state tables, including backup tables 2880 self.state_sync.remove_state(including_backup=True) 2881 self.console.log_status_update("State tables removed.") 2882 2883 # Finally clear caches 2884 self.clear_caches() 2885 2886 return True 2887 2888 def _run_janitor(self, ignore_ttl: bool = False) -> None: 2889 current_ts = now_timestamp() 2890 2891 # Clean up expired environments by removing their views and schemas 2892 self._cleanup_environments(current_ts=current_ts) 2893 2894 delete_expired_snapshots( 2895 self.state_sync, 2896 self.snapshot_evaluator, 2897 current_ts=current_ts, 2898 ignore_ttl=ignore_ttl, 2899 console=self.console, 2900 batch_size=self.config.janitor.expired_snapshots_batch_size, 2901 ) 2902 self.state_sync.compact_intervals() 2903 2904 def _cleanup_environments(self, current_ts: t.Optional[int] = None) -> None: 2905 current_ts = current_ts or now_timestamp() 2906 2907 expired_environments_summaries = self.state_sync.get_expired_environments( 2908 current_ts=current_ts 2909 ) 2910 2911 for expired_env_summary in expired_environments_summaries: 2912 expired_env = self.state_reader.get_environment(expired_env_summary.name) 2913 2914 if expired_env: 2915 cleanup_expired_views( 2916 default_adapter=self.engine_adapter, 2917 engine_adapters=self.engine_adapters, 2918 environments=[expired_env], 2919 warn_on_delete_failure=self.config.janitor.warn_on_delete_failure, 2920 console=self.console, 2921 ) 2922 2923 self.state_sync.delete_expired_environments(current_ts=current_ts) 2924 2925 def _try_connection(self, connection_name: str, validator: t.Callable[[], None]) -> None: 2926 connection_name = connection_name.capitalize() 2927 try: 2928 validator() 2929 self.console.log_status_update(f"{connection_name} connection [green]succeeded[/green]") 2930 except Exception as ex: 2931 self.console.log_error(f"{connection_name} connection failed. {ex}") 2932 2933 def _new_state_sync(self) -> StateSync: 2934 return self._provided_state_sync or self._scheduler.create_state_sync(self) 2935 2936 def _new_selector( 2937 self, models: t.Optional[UniqueKeyDict[str, Model]] = None, dag: t.Optional[DAG[str]] = None 2938 ) -> Selector: 2939 return self._selector_cls( 2940 self.state_reader, 2941 models=models or self._models, 2942 context_path=self.path, 2943 dag=dag, 2944 default_catalog=self.default_catalog, 2945 dialect=self.default_dialect, 2946 cache_dir=self.cache_dir, 2947 ) 2948 2949 def _register_notification_targets(self) -> None: 2950 event_notifications = collections.defaultdict(set) 2951 for target in self.notification_targets: 2952 if target.is_configured: 2953 for event in target.notify_on: 2954 event_notifications[event].add(target) 2955 user_notification_targets = { 2956 user.username: set( 2957 target for target in user.notification_targets if target.is_configured 2958 ) 2959 for user in self.users 2960 } 2961 self.notification_target_manager = NotificationTargetManager( 2962 event_notifications, user_notification_targets, username=self.config.username 2963 ) 2964 2965 def _load_materializations(self) -> None: 2966 if not self._loaded: 2967 for loader in self._loaders: 2968 loader.load_materializations() 2969 2970 def _select_models_for_run( 2971 self, 2972 select_models: t.Collection[str], 2973 no_auto_upstream: bool, 2974 snapshots: t.Collection[Snapshot], 2975 ) -> t.Set[str]: 2976 models: UniqueKeyDict[str, Model] = UniqueKeyDict( 2977 "models", **{s.name: s.model for s in snapshots if s.is_model} 2978 ) 2979 dag: DAG[str] = DAG() 2980 for fqn, model in models.items(): 2981 dag.add(fqn, model.depends_on) 2982 model_selector = self._new_selector(models=models, dag=dag) 2983 result = set(model_selector.expand_model_selections(select_models)) 2984 if not no_auto_upstream: 2985 result = set(dag.subdag(*result)) 2986 return result 2987 2988 @cached_property 2989 def _project_type(self) -> str: 2990 project_types = { 2991 c.DBT if loader.__class__.__name__.lower().startswith(c.DBT) else c.NATIVE 2992 for loader in self._loaders 2993 } 2994 return c.HYBRID if len(project_types) > 1 else first(project_types) 2995 2996 def _nodes_to_snapshots(self, nodes: t.Dict[str, Node]) -> t.Dict[str, Snapshot]: 2997 snapshots: t.Dict[str, Snapshot] = {} 2998 fingerprint_cache: t.Dict[str, SnapshotFingerprint] = {} 2999 3000 for node in nodes.values(): 3001 kwargs: t.Dict[str, t.Any] = {} 3002 if node.project in self._projects: 3003 config = self.config_for_node(node) 3004 kwargs["ttl"] = config.snapshot_ttl 3005 kwargs["table_naming_convention"] = config.physical_table_naming_convention 3006 3007 snapshot = Snapshot.from_node( 3008 node, 3009 nodes=nodes, 3010 cache=fingerprint_cache, 3011 **kwargs, 3012 ) 3013 snapshots[snapshot.name] = snapshot 3014 return snapshots 3015 3016 def _node_or_snapshot_to_fqn(self, node_or_snapshot: NodeOrSnapshot) -> str: 3017 if isinstance(node_or_snapshot, Snapshot): 3018 return node_or_snapshot.name 3019 if isinstance(node_or_snapshot, str) and not self.standalone_audits.get(node_or_snapshot): 3020 return normalize_model_name( 3021 node_or_snapshot, 3022 dialect=self.default_dialect, 3023 default_catalog=self.default_catalog, 3024 ) 3025 if not isinstance(node_or_snapshot, str): 3026 return node_or_snapshot.fqn 3027 return node_or_snapshot 3028 3029 @property 3030 def _plan_preview_enabled(self) -> bool: 3031 if self.config.plan.enable_preview is not None: 3032 return self.config.plan.enable_preview 3033 # It is dangerous to enable preview by default for dbt projects that rely on engines that don't support cloning. 3034 # Enabling previews in such cases can result in unintended full refreshes because dbt incremental models rely on 3035 # the maximum timestamp value in the target table. 3036 return self._project_type == c.NATIVE or self.engine_adapter.SUPPORTS_CLONING 3037 3038 def _get_plan_default_start_end( 3039 self, 3040 snapshots: t.Dict[str, Snapshot], 3041 max_interval_end_per_model: t.Dict[str, datetime], 3042 backfill_models: t.Optional[t.Set[str]], 3043 modified_model_names: t.Set[str], 3044 execution_time: t.Optional[TimeLike] = None, 3045 ) -> t.Tuple[t.Optional[int], t.Optional[int]]: 3046 if not max_interval_end_per_model: 3047 return None, None 3048 3049 default_end = to_timestamp(max(max_interval_end_per_model.values())) 3050 default_start: t.Optional[int] = None 3051 # Infer the default start by finding the smallest interval start that corresponds to the default end. 3052 for model_name in backfill_models or modified_model_names or max_interval_end_per_model: 3053 if model_name not in snapshots: 3054 continue 3055 node = snapshots[model_name].node 3056 interval_unit = node.interval_unit 3057 default_start = min( 3058 default_start or sys.maxsize, 3059 to_timestamp( 3060 interval_unit.cron_prev( 3061 interval_unit.cron_floor( 3062 max_interval_end_per_model.get( 3063 model_name, node.cron_floor(default_end) 3064 ), 3065 ), 3066 estimate=True, 3067 ) 3068 ), 3069 ) 3070 3071 if execution_time and to_timestamp(default_end) > to_timestamp(execution_time): 3072 # the end date can't be in the future, which can happen if a specific `execution_time` is set and prod intervals 3073 # are newer than it 3074 default_end = to_timestamp(execution_time) 3075 3076 return default_start, default_end 3077 3078 def _calculate_start_override_per_model( 3079 self, 3080 min_intervals: t.Optional[int], 3081 plan_start: t.Optional[TimeLike], 3082 plan_end: t.Optional[TimeLike], 3083 plan_execution_time: TimeLike, 3084 backfill_model_fqns: t.Optional[t.Set[str]], 3085 snapshots_by_model_fqn: t.Dict[str, Snapshot], 3086 end_override_per_model: t.Optional[t.Dict[str, datetime]], 3087 ) -> t.Dict[str, datetime]: 3088 if not min_intervals or not backfill_model_fqns or not plan_start: 3089 # If there are no models to backfill, there are no intervals to consider for backfill, so we dont need to consider a minimum number 3090 # If the plan doesnt have a start date, all intervals are considered already so we dont need to consider a minimum number 3091 # If we dont have a minimum number of intervals to consider, then we dont need to adjust the start date on a per-model basis 3092 return {} 3093 3094 start_overrides: t.Dict[str, datetime] = {} 3095 end_override_per_model = end_override_per_model or {} 3096 3097 plan_execution_time_dt = to_datetime(plan_execution_time) 3098 plan_start_dt = to_datetime(plan_start, relative_base=plan_execution_time_dt) 3099 plan_end_dt = to_datetime( 3100 plan_end or plan_execution_time_dt, relative_base=plan_execution_time_dt 3101 ) 3102 3103 # we need to take the DAG into account so that parent models can be expanded to cover at least as much as their children 3104 # for example, A(hourly) <- B(daily) 3105 # if min_intervals=1, A would have 1 hour and B would have 1 day 3106 # but B depends on A so in order for B to have 1 valid day, A needs to be expanded to 24 hours 3107 backfill_dag: DAG[str] = DAG() 3108 for fqn in backfill_model_fqns: 3109 backfill_dag.add( 3110 fqn, 3111 [ 3112 p.name 3113 for p in snapshots_by_model_fqn[fqn].parents 3114 if p.name in backfill_model_fqns 3115 ], 3116 ) 3117 3118 # start from the leaf nodes and work back towards the root because the min_start at the root node is determined by the calculated starts in the leaf nodes 3119 reversed_dag = backfill_dag.reversed 3120 graph = reversed_dag.graph 3121 3122 for model_fqn in reversed_dag: 3123 # Get the earliest start from all immediate children of this snapshot 3124 # this works because topological ordering guarantees that they've already been visited 3125 # and we always set a start override 3126 min_child_start = min( 3127 [start_overrides[immediate_child_fqn] for immediate_child_fqn in graph[model_fqn]], 3128 default=plan_start_dt, 3129 ) 3130 3131 snapshot = snapshots_by_model_fqn.get(model_fqn) 3132 3133 if not snapshot: 3134 continue 3135 3136 starting_point = end_override_per_model.get(model_fqn, plan_end_dt) 3137 if node_end := snapshot.node.end: 3138 # if we dont do this, if the node end is a *date* (as opposed to a timestamp) 3139 # we end up incorrectly winding back an extra day 3140 node_end_dt = make_exclusive(node_end) 3141 3142 if node_end_dt < plan_end_dt: 3143 # if the model has an end date that has already elapsed, use that as a starting point for calculating min_intervals 3144 # instead of the plan end. If we use the plan end, we will return intervals in the future which are invalid 3145 starting_point = node_end_dt 3146 3147 snapshot_start = snapshot.node.cron_floor(starting_point) 3148 3149 for _ in range(min_intervals): 3150 # wind back the starting point by :min_intervals intervals to arrive at the minimum snapshot start date 3151 snapshot_start = snapshot.node.cron_prev(snapshot_start) 3152 3153 start_overrides[model_fqn] = min(min_child_start, snapshot_start) 3154 3155 return start_overrides 3156 3157 def _get_max_interval_end_per_model( 3158 self, snapshots: t.Dict[str, Snapshot], backfill_models: t.Optional[t.Set[str]] 3159 ) -> t.Dict[str, datetime]: 3160 models_for_interval_end = ( 3161 self._get_models_for_interval_end(snapshots, backfill_models) 3162 if backfill_models is not None 3163 else None 3164 ) 3165 return { 3166 model_fqn: to_datetime(ts) 3167 for model_fqn, ts in self.state_sync.max_interval_end_per_model( 3168 c.PROD, 3169 models=models_for_interval_end, 3170 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 3171 ).items() 3172 } 3173 3174 @staticmethod 3175 def _get_models_for_interval_end( 3176 snapshots: t.Dict[str, Snapshot], backfill_models: t.Set[str] 3177 ) -> t.Set[str]: 3178 models_for_interval_end = set() 3179 models_stack = list(backfill_models) 3180 while models_stack: 3181 next_model = models_stack.pop() 3182 if next_model not in snapshots: 3183 continue 3184 models_for_interval_end.add(next_model) 3185 models_stack.extend( 3186 s.name 3187 for s in snapshots[next_model].parents 3188 if s.name not in models_for_interval_end 3189 ) 3190 return models_for_interval_end 3191 3192 def lint_models( 3193 self, 3194 models: t.Optional[t.Iterable[t.Union[str, Model]]] = None, 3195 raise_on_error: bool = True, 3196 ) -> t.List[AnnotatedRuleViolation]: 3197 found_error = False 3198 3199 model_list = ( 3200 list(self.get_model(model, raise_if_missing=True) for model in models) 3201 if models 3202 else self.models.values() 3203 ) 3204 all_violations = [] 3205 for model in model_list: 3206 # Linter may be `None` if the context is not loaded yet 3207 if linter := self._linters.get(model.project): 3208 lint_violation, violations = ( 3209 linter.lint_model(model, self, console=self.console) or found_error 3210 ) 3211 if lint_violation: 3212 found_error = True 3213 all_violations.extend(violations) 3214 3215 if raise_on_error and found_error: 3216 raise LinterError( 3217 "Linter detected errors in the code. Please fix them before proceeding." 3218 ) 3219 3220 return all_violations 3221 3222 def select_tests( 3223 self, 3224 tests: t.Optional[t.List[str]] = None, 3225 patterns: t.Optional[t.List[str]] = None, 3226 ) -> t.List[ModelTestMetadata]: 3227 """Filter pre-loaded test metadata based on tests and patterns.""" 3228 3229 test_meta = self._model_test_metadata 3230 3231 if tests: 3232 filtered_tests = [] 3233 for test in tests: 3234 if "::" in test: 3235 if test in self._model_test_metadata_fully_qualified_name_index: 3236 filtered_tests.append( 3237 self._model_test_metadata_fully_qualified_name_index[test] 3238 ) 3239 else: 3240 test_path = Path(test) 3241 if test_path in self._model_test_metadata_path_index: 3242 filtered_tests.extend(self._model_test_metadata_path_index[test_path]) 3243 3244 test_meta = filtered_tests 3245 3246 if patterns: 3247 test_meta = filter_tests_by_patterns(test_meta, patterns) 3248 3249 return test_meta
Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
Arguments:
- notification_targets: The notification target to use. Defaults to what is defined in config.
- paths: The directories containing SQLMesh files.
- config: A Config object or the name of a Config object in config.py.
- connection: The name of the connection. If not specified the first connection as it appears in configuration will be used.
- test_connection: The name of the connection to use for tests. If not specified the first connection as it appears in configuration will be used.
- concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
- load: Whether or not to automatically load all models and macros (default True).
- console: The rich instance used for printing out CLI command results.
- users: A list of users to make known to SQLMesh.
377 def __init__( 378 self, 379 notification_targets: t.Optional[t.List[NotificationTarget]] = None, 380 state_sync: t.Optional[StateSync] = None, 381 paths: t.Union[str | Path, t.Iterable[str | Path]] = "", 382 config: t.Optional[t.Union[C, str, t.Dict[Path, C]]] = None, 383 gateway: t.Optional[str] = None, 384 concurrent_tasks: t.Optional[int] = None, 385 loader: t.Optional[t.Type[Loader]] = None, 386 load: bool = True, 387 users: t.Optional[t.List[User]] = None, 388 config_loader_kwargs: t.Optional[t.Dict[str, t.Any]] = None, 389 selector: t.Optional[t.Type[Selector]] = None, 390 ): 391 self.configs = ( 392 config 393 if isinstance(config, dict) 394 else load_configs(config, self.CONFIG_TYPE, paths, **(config_loader_kwargs or {})) 395 ) 396 self._projects = {config.project for config in self.configs.values()} 397 self.dag: DAG[str] = DAG() 398 self._models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") 399 self._audits: UniqueKeyDict[str, ModelAudit] = UniqueKeyDict("audits") 400 self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( 401 "standaloneaudits" 402 ) 403 self._model_test_metadata: t.List[ModelTestMetadata] = [] 404 self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} 405 self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} 406 self._models_with_tests: t.Set[str] = set() 407 408 self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") 409 self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") 410 self._jinja_macros = JinjaMacroRegistry() 411 self._requirements: t.Dict[str, str] = {} 412 self._environment_statements: t.List[EnvironmentStatements] = [] 413 self._excluded_requirements: t.Set[str] = set() 414 self._engine_adapter: t.Optional[EngineAdapter] = None 415 self._linters: t.Dict[str, Linter] = {} 416 self._loaded: bool = False 417 self._selector_cls = selector or NativeSelector 418 419 self.path, self.config = t.cast(t.Tuple[Path, C], next(iter(self.configs.items()))) 420 421 self._all_dialects: t.Set[str] = {self.config.dialect or ""} 422 423 if self.config.disable_anonymized_analytics: 424 analytics.disable_analytics() 425 426 self.gateway = gateway 427 self._scheduler = self.config.get_scheduler(self.gateway) 428 self.environment_ttl = self.config.environment_ttl 429 self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments) 430 self.auto_categorize_changes = self.config.plan.auto_categorize_changes 431 self.selected_gateway = (gateway or self.config.default_gateway_name).lower() 432 433 gw_model_defaults = self.config.get_gateway(self.selected_gateway).model_defaults 434 if gw_model_defaults: 435 # Merge global model defaults with the selected gateway's, if it's overriden 436 global_defaults = self.config.model_defaults.model_dump(exclude_unset=True) 437 gateway_defaults = gw_model_defaults.model_dump(exclude_unset=True) 438 439 self.config.model_defaults = ModelDefaultsConfig( 440 **{**global_defaults, **gateway_defaults} 441 ) 442 443 # This allows overriding the default dialect's normalization strategy, so for example 444 # one can do `dialect="duckdb,normalization_strategy=lowercase"` and this will be 445 # applied to the DuckDB dialect globally 446 if "normalization_strategy" in str(self.config.dialect): 447 dialect = Dialect.get_or_raise(self.config.dialect) 448 type(dialect).NORMALIZATION_STRATEGY = dialect.normalization_strategy 449 450 self._loaders = [ 451 (loader or config.loader)(self, path, **config.loader_kwargs) 452 for path, config in self.configs.items() 453 ] 454 455 self._concurrent_tasks = concurrent_tasks 456 self._state_connection_config = ( 457 self.config.get_state_connection(self.gateway) or self.connection_config 458 ) 459 460 self._snapshot_evaluator: t.Optional[SnapshotEvaluator] = None 461 462 self.console = get_console() 463 setattr(self.console, "dialect", self.config.dialect) 464 465 self._provided_state_sync: t.Optional[StateSync] = state_sync 466 self._state_sync: t.Optional[StateSync] = None 467 468 # Should we dedupe notification_targets? If so how? 469 self.notification_targets = (notification_targets or []) + self.config.notification_targets 470 self.users = (users or []) + self.config.users 471 self.users = list({user.username: user for user in self.users}.values()) 472 self._register_notification_targets() 473 474 if load: 475 self.load()
The type of plan builder object to use (default: PlanBuilder).
488 @property 489 def snapshot_evaluator(self) -> SnapshotEvaluator: 490 if not self._snapshot_evaluator: 491 self._snapshot_evaluator = SnapshotEvaluator( 492 { 493 gateway: adapter.with_settings(execute_log_level=logging.INFO) 494 for gateway, adapter in self.engine_adapters.items() 495 }, 496 ddl_concurrent_tasks=self.concurrent_tasks, 497 selected_gateway=self.selected_gateway, 498 ) 499 return self._snapshot_evaluator
501 def execution_context( 502 self, 503 deployability_index: t.Optional[DeployabilityIndex] = None, 504 engine_adapter: t.Optional[EngineAdapter] = None, 505 snapshots: t.Optional[t.Dict[str, Snapshot]] = None, 506 ) -> ExecutionContext: 507 """Returns an execution context.""" 508 return ExecutionContext( 509 engine_adapter=engine_adapter or self.engine_adapter, 510 snapshots=snapshots or self.snapshots, 511 deployability_index=deployability_index, 512 default_dialect=self.default_dialect, 513 default_catalog=self.default_catalog, 514 )
Returns an execution context.
516 @python_api_analytics 517 def upsert_model(self, model: t.Union[str, Model], **kwargs: t.Any) -> Model: 518 """Update or insert a model. 519 520 The context's models dictionary will be updated to include these changes. 521 522 Args: 523 model: Model name or instance to update. 524 kwargs: The kwargs to update the model with. 525 526 Returns: 527 A new instance of the updated or inserted model. 528 """ 529 model = self.get_model(model, raise_if_missing=True) 530 if not model.enabled: 531 raise SQLMeshError(f"The disabled model '{model.name}' cannot be upserted") 532 path = model._path 533 534 model = model.copy(update=kwargs) 535 model._path = path 536 537 self.dag.add(model.fqn, model.depends_on) 538 539 self._models.update( 540 { 541 model.fqn: model, 542 # bust the fingerprint cache for all downstream models 543 **{fqn: self._models[fqn].copy() for fqn in self.dag.downstream(model.fqn)}, 544 } 545 ) 546 547 update_model_schemas( 548 self.dag, 549 models=self._models, 550 cache_dir=self.cache_dir, 551 ) 552 553 if model.dialect: 554 self._all_dialects.add(model.dialect) 555 556 model.validate_definition() 557 558 return model
Update or insert a model.
The context's models dictionary will be updated to include these changes.
Arguments:
- model: Model name or instance to update.
- kwargs: The kwargs to update the model with.
Returns:
A new instance of the updated or inserted model.
560 def scheduler( 561 self, 562 environment: t.Optional[str] = None, 563 snapshot_evaluator: t.Optional[SnapshotEvaluator] = None, 564 ) -> Scheduler: 565 """Returns the built-in scheduler. 566 567 Args: 568 environment: The target environment to source model snapshots from, or None 569 if snapshots should be sourced from the currently loaded local state. 570 571 Returns: 572 The built-in scheduler instance. 573 """ 574 snapshots: t.Iterable[Snapshot] 575 if environment is not None: 576 stored_environment = self.state_sync.get_environment(environment) 577 if stored_environment is None: 578 raise ConfigError(f"Environment '{environment}' was not found.") 579 snapshots = self.state_sync.get_snapshots(stored_environment.snapshots).values() 580 else: 581 snapshots = self.snapshots.values() 582 583 if not snapshots: 584 raise ConfigError("No models were found") 585 586 return self.create_scheduler(snapshots, snapshot_evaluator or self.snapshot_evaluator)
Returns the built-in scheduler.
Arguments:
- environment: The target environment to source model snapshots from, or None if snapshots should be sourced from the currently loaded local state.
Returns:
The built-in scheduler instance.
588 def create_scheduler( 589 self, snapshots: t.Iterable[Snapshot], snapshot_evaluator: SnapshotEvaluator 590 ) -> Scheduler: 591 """Creates the built-in scheduler. 592 593 Args: 594 snapshots: The snapshots to schedule. 595 596 Returns: 597 The built-in scheduler instance. 598 """ 599 return Scheduler( 600 snapshots, 601 snapshot_evaluator, 602 self.state_sync, 603 default_catalog=self.default_catalog, 604 max_workers=self.concurrent_tasks, 605 console=self.console, 606 notification_target_manager=self.notification_target_manager, 607 )
Creates the built-in scheduler.
Arguments:
- snapshots: The snapshots to schedule.
Returns:
The built-in scheduler instance.
609 @property 610 def state_sync(self) -> StateSync: 611 if not self._state_sync: 612 self._state_sync = self._new_state_sync() 613 614 if self._state_sync.get_versions(validate=False).schema_version == 0: 615 self.console.log_status_update("Initializing new project state...") 616 self._state_sync.migrate() 617 self._state_sync.get_versions() 618 self._state_sync = CachingStateSync(self._state_sync) # type: ignore 619 return self._state_sync
625 def refresh(self) -> None: 626 """Refresh all models that have been updated.""" 627 if any(loader.reload_needed() for loader in self._loaders): 628 self.load()
Refresh all models that have been updated.
630 def load(self, update_schemas: bool = True) -> GenericContext[C]: 631 """Load all files in the context's path.""" 632 load_start_ts = time.perf_counter() 633 634 loaded_projects = [loader.load() for loader in self._loaders] 635 636 self.dag = DAG() 637 self._standalone_audits.clear() 638 self._audits.clear() 639 self._macros.clear() 640 self._models.clear() 641 self._metrics.clear() 642 self._requirements.clear() 643 self._excluded_requirements.clear() 644 self._linters.clear() 645 self._environment_statements = [] 646 self._model_test_metadata.clear() 647 self._model_test_metadata_path_index.clear() 648 self._model_test_metadata_fully_qualified_name_index.clear() 649 self._models_with_tests.clear() 650 651 for loader, project in zip(self._loaders, loaded_projects): 652 self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) 653 self._macros.update(project.macros) 654 self._models.update(project.models) 655 self._metrics.update(project.metrics) 656 self._audits.update(project.audits) 657 self._standalone_audits.update(project.standalone_audits) 658 self._requirements.update(project.requirements) 659 self._excluded_requirements.update(project.excluded_requirements) 660 self._environment_statements.extend(project.environment_statements) 661 662 self._model_test_metadata.extend(project.model_test_metadata) 663 for metadata in project.model_test_metadata: 664 if metadata.path not in self._model_test_metadata_path_index: 665 self._model_test_metadata_path_index[metadata.path] = [] 666 self._model_test_metadata_path_index[metadata.path].append(metadata) 667 self._model_test_metadata_fully_qualified_name_index[ 668 metadata.fully_qualified_test_name 669 ] = metadata 670 self._models_with_tests.add(metadata.model_name) 671 672 config = loader.config 673 self._linters[config.project] = Linter.from_rules( 674 BUILTIN_RULES.union(project.user_rules), config.linter 675 ) 676 677 # Load environment statements from state for projects not in current load 678 if any(self._projects): 679 prod = self.state_reader.get_environment(c.PROD) 680 if prod: 681 existing_statements = self.state_reader.get_environment_statements(c.PROD) 682 for stmt in existing_statements: 683 if stmt.project and stmt.project not in self._projects: 684 self._environment_statements.append(stmt) 685 686 uncached = set() 687 688 if any(self._projects): 689 prod = self.state_reader.get_environment(c.PROD) 690 691 if prod: 692 for snapshot in self.state_reader.get_snapshots(prod.snapshots).values(): 693 if snapshot.node.project in self._projects: 694 uncached.add(snapshot.name) 695 else: 696 store = self._standalone_audits if snapshot.is_audit else self._models 697 store[snapshot.name] = snapshot.node # type: ignore 698 699 for model in self._models.values(): 700 self.dag.add(model.fqn, model.depends_on) 701 702 if update_schemas: 703 for fqn in self.dag: 704 model = self._models.get(fqn) # type: ignore 705 706 if not model or fqn in uncached: 707 continue 708 709 # make a copy of remote models that depend on local models or in the downstream chain 710 # without this, a SELECT * FROM local will not propogate properly because the downstream 711 # model will get mutated (schema changes) but the object is the same as the remote cache 712 if any(dep in uncached for dep in model.depends_on): 713 uncached.add(fqn) 714 self._models.update({fqn: model.copy(update={"mapping_schema": {}})}) 715 continue 716 717 update_model_schemas( 718 self.dag, 719 models=self._models, 720 cache_dir=self.cache_dir, 721 ) 722 723 models = self.models.values() 724 for model in models: 725 # The model definition can be validated correctly only after the schema is set. 726 model.validate_definition() 727 728 duplicates = set(self._models) & set(self._standalone_audits) 729 if duplicates: 730 raise ConfigError( 731 f"Models and Standalone audits cannot have the same name: {duplicates}" 732 ) 733 734 self._all_dialects = {m.dialect for m in self._models.values() if m.dialect} | { 735 self.default_dialect or "" 736 } 737 738 analytics.collector.on_project_loaded( 739 project_type=self._project_type, 740 models_count=len(self._models), 741 audits_count=len(self._audits), 742 standalone_audits_count=len(self._standalone_audits), 743 macros_count=len(self._macros), 744 jinja_macros_count=len(self._jinja_macros.root_macros), 745 load_time_sec=time.perf_counter() - load_start_ts, 746 state_sync_fingerprint=self._scheduler.state_sync_fingerprint(self), 747 project_name=self.config.project, 748 ) 749 750 self._loaded = True 751 return self
Load all files in the context's path.
753 @python_api_analytics 754 def run( 755 self, 756 environment: t.Optional[str] = None, 757 *, 758 start: t.Optional[TimeLike] = None, 759 end: t.Optional[TimeLike] = None, 760 execution_time: t.Optional[TimeLike] = None, 761 skip_janitor: bool = False, 762 ignore_cron: bool = False, 763 select_models: t.Optional[t.Collection[str]] = None, 764 exit_on_env_update: t.Optional[int] = None, 765 no_auto_upstream: bool = False, 766 ) -> CompletionStatus: 767 """Run the entire dag through the scheduler. 768 769 Args: 770 environment: The target environment to source model snapshots from and virtually update. Default: prod. 771 start: The start of the interval to render. 772 end: The end of the interval to render. 773 execution_time: The date/time time reference to use for execution time. Defaults to now. 774 skip_janitor: Whether to skip the janitor task. 775 ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals. 776 select_models: A list of model selection expressions to filter models that should run. Note that 777 upstream dependencies of selected models will also be evaluated. 778 exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update 779 to the target environment. 780 no_auto_upstream: Whether to not force upstream models to run. Only applicable when using `select_models`. 781 782 Returns: 783 True if the run was successful, False otherwise. 784 """ 785 environment = environment or self.config.default_target_environment 786 environment = Environment.sanitize_name(environment) 787 if not skip_janitor and environment.lower() == c.PROD: 788 self._run_janitor() 789 790 self.notification_target_manager.notify( 791 NotificationEvent.RUN_START, environment=environment 792 ) 793 analytics_run_id = analytics.collector.on_run_start( 794 engine_type=self.snapshot_evaluator.adapter.dialect, 795 state_sync_type=self.state_sync.state_type(), 796 ) 797 self._load_materializations() 798 799 env_check_attempts_num = max( 800 1, 801 self.config.run.environment_check_max_wait 802 // self.config.run.environment_check_interval, 803 ) 804 805 def _block_until_finalized() -> str: 806 for _ in range(env_check_attempts_num): 807 assert environment is not None # mypy 808 environment_state = self.state_sync.get_environment(environment) 809 if not environment_state: 810 raise SQLMeshError(f"Environment '{environment}' was not found.") 811 if environment_state.finalized_ts: 812 return environment_state.plan_id 813 self.console.log_warning( 814 f"Environment '{environment}' is being updated by plan '{environment_state.plan_id}'. " 815 f"Retrying in {self.config.run.environment_check_interval} seconds..." 816 ) 817 time.sleep(self.config.run.environment_check_interval) 818 raise SQLMeshError( 819 f"Exceeded the maximum wait time for environment '{environment}' to be ready. " 820 "This means that the environment either failed to update or the update is taking longer than expected. " 821 "See https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#run to adjust the timeout settings." 822 ) 823 824 success = False 825 interrupted = False 826 done = False 827 while not done: 828 plan_id_at_start = _block_until_finalized() 829 830 def _has_environment_changed() -> bool: 831 assert environment is not None # mypy 832 current_environment_state = self.state_sync.get_environment(environment) 833 return ( 834 not current_environment_state 835 or current_environment_state.plan_id != plan_id_at_start 836 or not current_environment_state.finalized_ts 837 ) 838 839 try: 840 completion_status = self._run( 841 environment, 842 start=start, 843 end=end, 844 execution_time=execution_time, 845 ignore_cron=ignore_cron, 846 select_models=select_models, 847 circuit_breaker=_has_environment_changed, 848 no_auto_upstream=no_auto_upstream, 849 ) 850 done = True 851 except CircuitBreakerError: 852 self.console.log_warning( 853 f"Environment '{environment}' modified while running. Restarting the run..." 854 ) 855 if exit_on_env_update: 856 interrupted = True 857 done = True 858 except Exception as e: 859 self.notification_target_manager.notify( 860 NotificationEvent.RUN_FAILURE, traceback.format_exc() 861 ) 862 logger.info("Run failed.", exc_info=e) 863 analytics.collector.on_run_end( 864 run_id=analytics_run_id, succeeded=False, interrupted=False, error=e 865 ) 866 raise e 867 868 if completion_status.is_success or interrupted: 869 self.notification_target_manager.notify( 870 NotificationEvent.RUN_END, environment=environment 871 ) 872 self.console.log_success(f"Run finished for environment '{environment}'") 873 elif completion_status.is_failure: 874 self.notification_target_manager.notify( 875 NotificationEvent.RUN_FAILURE, "See console logs for details." 876 ) 877 878 analytics.collector.on_run_end( 879 run_id=analytics_run_id, succeeded=success, interrupted=interrupted 880 ) 881 882 if interrupted and exit_on_env_update is not None: 883 sys.exit(exit_on_env_update) 884 885 return completion_status
Run the entire dag through the scheduler.
Arguments:
- environment: The target environment to source model snapshots from and virtually update. Default: prod.
- start: The start of the interval to render.
- end: The end of the interval to render.
- execution_time: The date/time time reference to use for execution time. Defaults to now.
- skip_janitor: Whether to skip the janitor task.
- ignore_cron: Whether to ignore the model's cron schedule and run all available missing intervals.
- select_models: A list of model selection expressions to filter models that should run. Note that upstream dependencies of selected models will also be evaluated.
- exit_on_env_update: If set, exits with the provided code if the run is interrupted by an update to the target environment.
- no_auto_upstream: Whether to not force upstream models to run. Only applicable when using
select_models.
Returns:
True if the run was successful, False otherwise.
900 @python_api_analytics 901 def destroy(self) -> bool: 902 success = False 903 904 # Collect resources to be deleted 905 environments = self.state_reader.get_environments() 906 schemas_to_delete = set() 907 tables_to_delete = set() 908 views_to_delete = set() 909 all_snapshot_infos = set() 910 911 # For each environment find schemas and tables 912 for environment in environments: 913 all_snapshot_infos.update(environment.snapshots) 914 snapshots = self.state_reader.get_snapshots(environment.snapshots).values() 915 for snapshot in snapshots: 916 if snapshot.is_model and not snapshot.is_symbolic: 917 # Get the appropriate adapter 918 if environment.gateway_managed and snapshot.model_gateway: 919 adapter = self.engine_adapters.get( 920 snapshot.model_gateway, self.engine_adapter 921 ) 922 else: 923 adapter = self.engine_adapter 924 925 if environment.suffix_target.is_schema or environment.suffix_target.is_catalog: 926 schema = snapshot.qualified_view_name.schema_for_environment( 927 environment.naming_info, dialect=adapter.dialect 928 ) 929 catalog = snapshot.qualified_view_name.catalog_for_environment( 930 environment.naming_info, dialect=adapter.dialect 931 ) 932 if catalog: 933 schemas_to_delete.add(f"{catalog}.{schema}") 934 else: 935 schemas_to_delete.add(schema) 936 937 if environment.suffix_target.is_table: 938 view_name = snapshot.qualified_view_name.for_environment( 939 environment.naming_info, dialect=adapter.dialect 940 ) 941 views_to_delete.add(view_name) 942 943 # Add snapshot tables 944 table_name = snapshot.table_name() 945 tables_to_delete.add(table_name) 946 947 if self.console.start_destroy(schemas_to_delete, views_to_delete, tables_to_delete): 948 try: 949 success = self._destroy() 950 finally: 951 self.console.stop_destroy(success=success) 952 953 return success
967 def get_model( 968 self, model_or_snapshot: ModelOrSnapshot, raise_if_missing: bool = False 969 ) -> t.Optional[Model]: 970 """Returns a model with the given name or None if a model with such name doesn't exist. 971 972 Args: 973 model_or_snapshot: A model name, model, or snapshot. 974 raise_if_missing: Raises an error if a model is not found. 975 976 Returns: 977 The expected model. 978 """ 979 if isinstance(model_or_snapshot, Snapshot): 980 return model_or_snapshot.model 981 if not isinstance(model_or_snapshot, str): 982 return model_or_snapshot 983 984 try: 985 # We should try all dialects referenced in the project for cases when models use mixed dialects. 986 for dialect in self._all_dialects: 987 normalized_name = normalize_model_name( 988 model_or_snapshot, 989 dialect=dialect, 990 default_catalog=self.default_catalog, 991 ) 992 if normalized_name in self._models: 993 return self._models[normalized_name] 994 except: 995 pass 996 997 if raise_if_missing: 998 if model_or_snapshot.endswith((".sql", ".py")): 999 msg = "Resolving models by path is not supported, please pass in the model name instead." 1000 else: 1001 msg = f"Cannot find model with name '{model_or_snapshot}'" 1002 1003 raise SQLMeshError(msg) 1004 1005 return None
Returns a model with the given name or None if a model with such name doesn't exist.
Arguments:
- model_or_snapshot: A model name, model, or snapshot.
- raise_if_missing: Raises an error if a model is not found.
Returns:
The expected model.
1020 def get_snapshot( 1021 self, node_or_snapshot: NodeOrSnapshot, raise_if_missing: bool = False 1022 ) -> t.Optional[Snapshot]: 1023 """Returns a snapshot with the given name or None if a snapshot with such name doesn't exist. 1024 1025 Args: 1026 node_or_snapshot: A node name, node, or snapshot. 1027 raise_if_missing: Raises an error if a snapshot is not found. 1028 1029 Returns: 1030 The expected snapshot. 1031 """ 1032 if isinstance(node_or_snapshot, Snapshot): 1033 return node_or_snapshot 1034 fqn = self._node_or_snapshot_to_fqn(node_or_snapshot) 1035 snapshot = self.snapshots.get(fqn) 1036 1037 if raise_if_missing and not snapshot: 1038 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1039 1040 return snapshot
Returns a snapshot with the given name or None if a snapshot with such name doesn't exist.
Arguments:
- node_or_snapshot: A node name, node, or snapshot.
- raise_if_missing: Raises an error if a snapshot is not found.
Returns:
The expected snapshot.
1042 def config_for_path(self, path: Path) -> t.Tuple[Config, Path]: 1043 """Returns the config and path of the said project for a given file path.""" 1044 for config_path, config in self.configs.items(): 1045 try: 1046 path.relative_to(config_path) 1047 return config, config_path 1048 except ValueError: 1049 pass 1050 return self.config, self.path
Returns the config and path of the said project for a given file path.
1058 @property 1059 def models(self) -> MappingProxyType[str, Model]: 1060 """Returns all registered models in this context.""" 1061 return MappingProxyType(self._models)
Returns all registered models in this context.
1063 @property 1064 def metrics(self) -> MappingProxyType[str, Metric]: 1065 """Returns all registered metrics in this context.""" 1066 return MappingProxyType(self._metrics)
Returns all registered metrics in this context.
1068 @property 1069 def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: 1070 """Returns all registered standalone audits in this context.""" 1071 return MappingProxyType(self._standalone_audits)
Returns all registered standalone audits in this context.
1073 @property 1074 def models_with_tests(self) -> t.Set[str]: 1075 """Returns all models with tests in this context.""" 1076 return self._models_with_tests
Returns all models with tests in this context.
1078 @property 1079 def snapshots(self) -> t.Dict[str, Snapshot]: 1080 """Generates and returns snapshots based on models registered in this context. 1081 1082 If one of the snapshots has been previously stored in the persisted state, the stored 1083 instance will be returned. 1084 """ 1085 return self._snapshots()
Generates and returns snapshots based on models registered in this context.
If one of the snapshots has been previously stored in the persisted state, the stored instance will be returned.
1087 @property 1088 def requirements(self) -> t.Dict[str, str]: 1089 """Returns the Python dependencies of the project loaded in this context.""" 1090 return self._requirements.copy()
Returns the Python dependencies of the project loaded in this context.
1096 @python_api_analytics 1097 def render( 1098 self, 1099 model_or_snapshot: ModelOrSnapshot, 1100 *, 1101 start: t.Optional[TimeLike] = None, 1102 end: t.Optional[TimeLike] = None, 1103 execution_time: t.Optional[TimeLike] = None, 1104 expand: t.Union[bool, t.Iterable[str]] = False, 1105 **kwargs: t.Any, 1106 ) -> exp.Expression: 1107 """Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models. 1108 1109 Args: 1110 model_or_snapshot: The model, model name, or snapshot to render. 1111 start: The start of the interval to render. 1112 end: The end of the interval to render. 1113 execution_time: The date/time time reference to use for execution time. Defaults to now. 1114 expand: Whether or not to use expand materialized models, defaults to False. 1115 If True, all referenced models are expanded as raw queries. 1116 If a list, only referenced models are expanded as raw queries. 1117 1118 Returns: 1119 The rendered expression. 1120 """ 1121 execution_time = execution_time or now() 1122 1123 model = self.get_model(model_or_snapshot, raise_if_missing=True) 1124 1125 if expand and not isinstance(expand, bool): 1126 expand = { 1127 normalize_model_name( 1128 x, default_catalog=self.default_catalog, dialect=self.default_dialect 1129 ) 1130 for x in expand 1131 } 1132 1133 expand = self.dag.upstream(model.fqn) if expand is True else expand or [] 1134 1135 if model.is_seed: 1136 import pandas as pd 1137 1138 df = next( 1139 model.render( 1140 context=self.execution_context( 1141 engine_adapter=self._get_engine_adapter(model.gateway) 1142 ), 1143 start=start, 1144 end=end, 1145 execution_time=execution_time, 1146 **kwargs, 1147 ) 1148 ) 1149 return next(pandas_to_sql(t.cast(pd.DataFrame, df), model.columns_to_types)) 1150 1151 snapshots = self.snapshots 1152 deployability_index = DeployabilityIndex.create(snapshots.values(), start=start) 1153 1154 return model.render_query_or_raise( 1155 start=start, 1156 end=end, 1157 execution_time=execution_time, 1158 snapshots=snapshots, 1159 expand=expand, 1160 deployability_index=deployability_index, 1161 engine_adapter=self._get_engine_adapter(model.gateway), 1162 **kwargs, 1163 )
Renders a model's query, expanding macros with provided kwargs, and optionally expanding referenced models.
Arguments:
- model_or_snapshot: The model, model name, or snapshot to render.
- start: The start of the interval to render.
- end: The end of the interval to render.
- execution_time: The date/time time reference to use for execution time. Defaults to now.
- expand: Whether or not to use expand materialized models, defaults to False. If True, all referenced models are expanded as raw queries. If a list, only referenced models are expanded as raw queries.
Returns:
The rendered expression.
1165 @python_api_analytics 1166 def evaluate( 1167 self, 1168 model_or_snapshot: ModelOrSnapshot, 1169 start: TimeLike, 1170 end: TimeLike, 1171 execution_time: TimeLike, 1172 limit: t.Optional[int] = None, 1173 **kwargs: t.Any, 1174 ) -> DF: 1175 """Evaluate a model or snapshot (running its query against a DB/Engine). 1176 1177 This method is used to test or iterate on models without side effects. 1178 1179 Args: 1180 model_or_snapshot: The model, model name, or snapshot to render. 1181 start: The start of the interval to evaluate. 1182 end: The end of the interval to evaluate. 1183 execution_time: The date/time time reference to use for execution time. 1184 limit: A limit applied to the model. 1185 """ 1186 snapshots = self.snapshots 1187 fqn = self._node_or_snapshot_to_fqn(model_or_snapshot) 1188 if fqn not in snapshots: 1189 raise SQLMeshError(f"Cannot find snapshot for '{fqn}'") 1190 snapshot = snapshots[fqn] 1191 1192 # Expand all uncategorized parents since physical tables don't exist for them yet 1193 expand = [ 1194 parent 1195 for parent in self.dag.upstream(snapshot.model.fqn) 1196 if (parent_snapshot := snapshots.get(parent)) 1197 and parent_snapshot.is_model 1198 and parent_snapshot.model.is_sql 1199 and not parent_snapshot.categorized 1200 ] 1201 1202 df = self.snapshot_evaluator.evaluate_and_fetch( 1203 snapshot, 1204 start=start, 1205 end=end, 1206 execution_time=execution_time, 1207 snapshots=self.snapshots, 1208 limit=limit or c.DEFAULT_MAX_LIMIT, 1209 expand=expand, 1210 ) 1211 1212 if df is None: 1213 raise RuntimeError(f"Error evaluating {snapshot.name}") 1214 1215 return df
Evaluate a model or snapshot (running its query against a DB/Engine).
This method is used to test or iterate on models without side effects.
Arguments:
- model_or_snapshot: The model, model name, or snapshot to render.
- start: The start of the interval to evaluate.
- end: The end of the interval to evaluate.
- execution_time: The date/time time reference to use for execution time.
- limit: A limit applied to the model.
1217 @python_api_analytics 1218 def format( 1219 self, 1220 transpile: t.Optional[str] = None, 1221 rewrite_casts: t.Optional[bool] = None, 1222 append_newline: t.Optional[bool] = None, 1223 *, 1224 check: t.Optional[bool] = None, 1225 paths: t.Optional[t.Tuple[t.Union[str, Path], ...]] = None, 1226 **kwargs: t.Any, 1227 ) -> bool: 1228 """Format all SQL models and audits.""" 1229 filtered_targets = [ 1230 target 1231 for target in chain(self._models.values(), self._audits.values()) 1232 if target._path is not None 1233 and target._path.suffix == ".sql" 1234 and (not paths or any(target._path.samefile(p) for p in paths)) 1235 ] 1236 unformatted_file_paths = [] 1237 1238 for target in filtered_targets: 1239 if ( 1240 target._path is None or target.formatting is False 1241 ): # introduced to satisfy type checker as still want to pull filter out as many targets as possible before loop 1242 continue 1243 1244 with open(target._path, "r+", encoding="utf-8") as file: 1245 before = file.read() 1246 1247 after = self._format( 1248 target, 1249 before, 1250 transpile=transpile, 1251 rewrite_casts=rewrite_casts, 1252 append_newline=append_newline, 1253 **kwargs, 1254 ) 1255 1256 if not check: 1257 file.seek(0) 1258 file.write(after) 1259 file.truncate() 1260 elif before != after: 1261 unformatted_file_paths.append(target._path) 1262 1263 if unformatted_file_paths: 1264 for path in unformatted_file_paths: 1265 self.console.log_status_update(f"{path} needs reformatting.") 1266 self.console.log_status_update( 1267 f"\n{len(unformatted_file_paths)} file(s) need reformatting." 1268 ) 1269 return False 1270 1271 return True
Format all SQL models and audits.
1311 @python_api_analytics 1312 def plan( 1313 self, 1314 environment: t.Optional[str] = None, 1315 *, 1316 start: t.Optional[TimeLike] = None, 1317 end: t.Optional[TimeLike] = None, 1318 execution_time: t.Optional[TimeLike] = None, 1319 create_from: t.Optional[str] = None, 1320 skip_tests: t.Optional[bool] = None, 1321 restate_models: t.Optional[t.Iterable[str]] = None, 1322 no_gaps: t.Optional[bool] = None, 1323 skip_backfill: t.Optional[bool] = None, 1324 empty_backfill: t.Optional[bool] = None, 1325 forward_only: t.Optional[bool] = None, 1326 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1327 allow_additive_models: t.Optional[t.Collection[str]] = None, 1328 no_prompts: t.Optional[bool] = None, 1329 auto_apply: t.Optional[bool] = None, 1330 no_auto_categorization: t.Optional[bool] = None, 1331 effective_from: t.Optional[TimeLike] = None, 1332 include_unmodified: t.Optional[bool] = None, 1333 select_models: t.Optional[t.Collection[str]] = None, 1334 backfill_models: t.Optional[t.Collection[str]] = None, 1335 categorizer_config: t.Optional[CategorizerConfig] = None, 1336 enable_preview: t.Optional[bool] = None, 1337 no_diff: t.Optional[bool] = None, 1338 run: t.Optional[bool] = None, 1339 diff_rendered: t.Optional[bool] = None, 1340 skip_linter: t.Optional[bool] = None, 1341 explain: t.Optional[bool] = None, 1342 ignore_cron: t.Optional[bool] = None, 1343 min_intervals: t.Optional[int] = None, 1344 ) -> Plan: 1345 """Interactively creates a plan. 1346 1347 This method compares the current context with the target environment. It then presents 1348 the differences and asks whether to backfill each modified model. 1349 1350 Args: 1351 environment: The environment to diff and plan against. 1352 start: The start date of the backfill if there is one. 1353 end: The end date of the backfill if there is one. 1354 execution_time: The date/time reference to use for execution time. Defaults to now. 1355 create_from: The environment to create the target environment from if it 1356 doesn't exist. If not specified, the "prod" environment will be used. 1357 skip_tests: Unit tests are run by default so this will skip them if enabled 1358 restate_models: A list of either internal or external models, or tags, that need to be restated 1359 for the given plan interval. If the target environment is a production environment, 1360 ALL snapshots that depended on these upstream tables will have their intervals deleted 1361 (even ones not in this current environment). Only the snapshots in this environment will 1362 be backfilled whereas others need to be recovered on a future plan application. For development 1363 environments only snapshots that are part of this plan will be affected. 1364 no_gaps: Whether to ensure that new snapshots for models that are already a 1365 part of the target environment have no data gaps when compared against previous 1366 snapshots for same models. 1367 skip_backfill: Whether to skip the backfill step. Default: False. 1368 empty_backfill: Like skip_backfill, but also records processed intervals. 1369 forward_only: Whether the purpose of the plan is to make forward only changes. 1370 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1371 allow_additive_models: Models whose forward-only changes are allowed to be additive. 1372 no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that 1373 if this flag is set to true and there are uncategorized changes the plan creation will 1374 fail. Default: False. 1375 auto_apply: Whether to automatically apply the new plan after creation. Default: False. 1376 no_auto_categorization: Indicates whether to disable automatic categorization of model 1377 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1378 option determines the behavior. 1379 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1380 project config by default. 1381 effective_from: The effective date from which to apply forward-only changes on production. 1382 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1383 select_models: A list of model selection strings to filter the models that should be included into this plan. 1384 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1385 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1386 no_diff: Hide text differences for changed models. 1387 run: Whether to run latest intervals as part of the plan application. 1388 diff_rendered: Whether the diff should compare raw vs rendered models 1389 skip_linter: Linter runs by default so this will skip it if enabled 1390 explain: Whether to explain the plan instead of applying it. 1391 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1392 on every model when checking for missing intervals 1393 1394 Returns: 1395 The populated Plan object. 1396 """ 1397 plan_builder = self.plan_builder( 1398 environment, 1399 start=start, 1400 end=end, 1401 execution_time=execution_time, 1402 create_from=create_from, 1403 skip_tests=skip_tests, 1404 restate_models=restate_models, 1405 no_gaps=no_gaps, 1406 skip_backfill=skip_backfill, 1407 empty_backfill=empty_backfill, 1408 forward_only=forward_only, 1409 allow_destructive_models=allow_destructive_models, 1410 allow_additive_models=allow_additive_models, 1411 no_auto_categorization=no_auto_categorization, 1412 effective_from=effective_from, 1413 include_unmodified=include_unmodified, 1414 select_models=select_models, 1415 backfill_models=backfill_models, 1416 categorizer_config=categorizer_config, 1417 enable_preview=enable_preview, 1418 run=run, 1419 diff_rendered=diff_rendered, 1420 skip_linter=skip_linter, 1421 explain=explain, 1422 ignore_cron=ignore_cron, 1423 min_intervals=min_intervals, 1424 ) 1425 1426 plan = plan_builder.build() 1427 1428 if no_auto_categorization or plan.uncategorized: 1429 # Prompts are required if the auto categorization is disabled 1430 # or if there are any uncategorized snapshots in the plan 1431 no_prompts = False 1432 1433 if explain: 1434 auto_apply = True 1435 1436 self.console.plan( 1437 plan_builder, 1438 auto_apply if auto_apply is not None else self.config.plan.auto_apply, 1439 self.default_catalog, 1440 no_diff=no_diff if no_diff is not None else self.config.plan.no_diff, 1441 no_prompts=no_prompts if no_prompts is not None else self.config.plan.no_prompts, 1442 ) 1443 1444 return plan
Interactively creates a plan.
This method compares the current context with the target environment. It then presents the differences and asks whether to backfill each modified model.
Arguments:
- environment: The environment to diff and plan against.
- start: The start date of the backfill if there is one.
- end: The end date of the backfill if there is one.
- execution_time: The date/time reference to use for execution time. Defaults to now.
- create_from: The environment to create the target environment from if it doesn't exist. If not specified, the "prod" environment will be used.
- skip_tests: Unit tests are run by default so this will skip them if enabled
- restate_models: A list of either internal or external models, or tags, that need to be restated for the given plan interval. If the target environment is a production environment, ALL snapshots that depended on these upstream tables will have their intervals deleted (even ones not in this current environment). Only the snapshots in this environment will be backfilled whereas others need to be recovered on a future plan application. For development environments only snapshots that are part of this plan will be affected.
- no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
- skip_backfill: Whether to skip the backfill step. Default: False.
- empty_backfill: Like skip_backfill, but also records processed intervals.
- forward_only: Whether the purpose of the plan is to make forward only changes.
- allow_destructive_models: Models whose forward-only changes are allowed to be destructive.
- allow_additive_models: Models whose forward-only changes are allowed to be additive.
- no_prompts: Whether to disable interactive prompts for the backfill time range. Please note that if this flag is set to true and there are uncategorized changes the plan creation will fail. Default: False.
- auto_apply: Whether to automatically apply the new plan after creation. Default: False.
- no_auto_categorization: Indicates whether to disable automatic categorization of model changes (breaking / non-breaking). If not provided, then the corresponding configuration option determines the behavior.
- categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the project config by default.
- effective_from: The effective date from which to apply forward-only changes on production.
- include_unmodified: Indicates whether to include unmodified models in the target development environment.
- select_models: A list of model selection strings to filter the models that should be included into this plan.
- backfill_models: A list of model selection strings to filter the models for which the data should be backfilled.
- enable_preview: Indicates whether to enable preview for forward-only models in development environments.
- no_diff: Hide text differences for changed models.
- run: Whether to run latest intervals as part of the plan application.
- diff_rendered: Whether the diff should compare raw vs rendered models
- skip_linter: Linter runs by default so this will skip it if enabled
- explain: Whether to explain the plan instead of applying it.
- min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered on every model when checking for missing intervals
Returns:
The populated Plan object.
1446 @python_api_analytics 1447 def plan_builder( 1448 self, 1449 environment: t.Optional[str] = None, 1450 *, 1451 start: t.Optional[TimeLike] = None, 1452 end: t.Optional[TimeLike] = None, 1453 execution_time: t.Optional[TimeLike] = None, 1454 create_from: t.Optional[str] = None, 1455 skip_tests: t.Optional[bool] = None, 1456 restate_models: t.Optional[t.Iterable[str]] = None, 1457 no_gaps: t.Optional[bool] = None, 1458 skip_backfill: t.Optional[bool] = None, 1459 empty_backfill: t.Optional[bool] = None, 1460 forward_only: t.Optional[bool] = None, 1461 allow_destructive_models: t.Optional[t.Collection[str]] = None, 1462 allow_additive_models: t.Optional[t.Collection[str]] = None, 1463 no_auto_categorization: t.Optional[bool] = None, 1464 effective_from: t.Optional[TimeLike] = None, 1465 include_unmodified: t.Optional[bool] = None, 1466 select_models: t.Optional[t.Collection[str]] = None, 1467 backfill_models: t.Optional[t.Collection[str]] = None, 1468 categorizer_config: t.Optional[CategorizerConfig] = None, 1469 enable_preview: t.Optional[bool] = None, 1470 run: t.Optional[bool] = None, 1471 diff_rendered: t.Optional[bool] = None, 1472 skip_linter: t.Optional[bool] = None, 1473 explain: t.Optional[bool] = None, 1474 ignore_cron: t.Optional[bool] = None, 1475 min_intervals: t.Optional[int] = None, 1476 always_include_local_changes: t.Optional[bool] = None, 1477 ) -> PlanBuilder: 1478 """Creates a plan builder. 1479 1480 Args: 1481 environment: The environment to diff and plan against. 1482 start: The start date of the backfill if there is one. 1483 end: The end date of the backfill if there is one. 1484 execution_time: The date/time reference to use for execution time. Defaults to now. 1485 create_from: The environment to create the target environment from if it 1486 doesn't exist. If not specified, the "prod" environment will be used. 1487 skip_tests: Unit tests are run by default so this will skip them if enabled 1488 restate_models: A list of either internal or external models, or tags, that need to be restated 1489 for the given plan interval. If the target environment is a production environment, 1490 ALL snapshots that depended on these upstream tables will have their intervals deleted 1491 (even ones not in this current environment). Only the snapshots in this environment will 1492 be backfilled whereas others need to be recovered on a future plan application. For development 1493 environments only snapshots that are part of this plan will be affected. 1494 no_gaps: Whether to ensure that new snapshots for models that are already a 1495 part of the target environment have no data gaps when compared against previous 1496 snapshots for same models. 1497 skip_backfill: Whether to skip the backfill step. Default: False. 1498 empty_backfill: Like skip_backfill, but also records processed intervals. 1499 forward_only: Whether the purpose of the plan is to make forward only changes. 1500 allow_destructive_models: Models whose forward-only changes are allowed to be destructive. 1501 no_auto_categorization: Indicates whether to disable automatic categorization of model 1502 changes (breaking / non-breaking). If not provided, then the corresponding configuration 1503 option determines the behavior. 1504 categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the 1505 project config by default. 1506 effective_from: The effective date from which to apply forward-only changes on production. 1507 include_unmodified: Indicates whether to include unmodified models in the target development environment. 1508 select_models: A list of model selection strings to filter the models that should be included into this plan. 1509 backfill_models: A list of model selection strings to filter the models for which the data should be backfilled. 1510 enable_preview: Indicates whether to enable preview for forward-only models in development environments. 1511 run: Whether to run latest intervals as part of the plan application. 1512 diff_rendered: Whether the diff should compare raw vs rendered models 1513 min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered 1514 on every model when checking for missing intervals 1515 always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. 1516 However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour. 1517 1518 Returns: 1519 The plan builder. 1520 """ 1521 kwargs: t.Dict[str, t.Optional[UserProvidedFlags]] = { 1522 "start": start, 1523 "end": end, 1524 "execution_time": execution_time, 1525 "create_from": create_from, 1526 "skip_tests": skip_tests, 1527 "restate_models": list(restate_models) if restate_models is not None else None, 1528 "no_gaps": no_gaps, 1529 "skip_backfill": skip_backfill, 1530 "empty_backfill": empty_backfill, 1531 "forward_only": forward_only, 1532 "allow_destructive_models": list(allow_destructive_models) 1533 if allow_destructive_models is not None 1534 else None, 1535 "allow_additive_models": list(allow_additive_models) 1536 if allow_additive_models is not None 1537 else None, 1538 "no_auto_categorization": no_auto_categorization, 1539 "effective_from": effective_from, 1540 "include_unmodified": include_unmodified, 1541 "select_models": list(select_models) if select_models is not None else None, 1542 "backfill_models": list(backfill_models) if backfill_models is not None else None, 1543 "enable_preview": enable_preview, 1544 "run": run, 1545 "diff_rendered": diff_rendered, 1546 "skip_linter": skip_linter, 1547 "min_intervals": min_intervals, 1548 } 1549 user_provided_flags: t.Dict[str, UserProvidedFlags] = { 1550 k: v for k, v in kwargs.items() if v is not None 1551 } 1552 1553 skip_tests = explain or skip_tests or False 1554 no_gaps = no_gaps or False 1555 skip_backfill = skip_backfill or False 1556 empty_backfill = empty_backfill or False 1557 run = run or False 1558 diff_rendered = diff_rendered or False 1559 skip_linter = skip_linter or False 1560 1561 environment = environment or self.config.default_target_environment 1562 environment = Environment.sanitize_name(environment) 1563 is_dev = environment != c.PROD 1564 1565 if include_unmodified is None: 1566 include_unmodified = self.config.plan.include_unmodified 1567 1568 if skip_backfill and not no_gaps and not is_dev: 1569 # note: we deliberately don't mention the --no-gaps flag in case the plan came from the sqlmesh_dbt command 1570 # todo: perhaps we could have better error messages if we check sys.argv[0] for which cli is running? 1571 self.console.log_warning( 1572 "Skipping the backfill stage for production can lead to unexpected results, such as tables being empty or incremental data with non-contiguous time ranges being made available.\n" 1573 "If you are doing this deliberately to create an empty version of a table to test a change, please consider using Virtual Data Environments instead." 1574 ) 1575 1576 if not skip_linter: 1577 self.lint_models() 1578 1579 self._run_plan_tests(skip_tests=skip_tests) 1580 1581 environment_ttl = ( 1582 self.environment_ttl if environment not in self.pinned_environments else None 1583 ) 1584 1585 model_selector = self._new_selector() 1586 1587 if allow_destructive_models: 1588 expanded_destructive_models = model_selector.expand_model_selections( 1589 allow_destructive_models 1590 ) 1591 else: 1592 expanded_destructive_models = None 1593 1594 if allow_additive_models: 1595 expanded_additive_models = model_selector.expand_model_selections(allow_additive_models) 1596 else: 1597 expanded_additive_models = None 1598 1599 if backfill_models: 1600 backfill_models = model_selector.expand_model_selections(backfill_models) 1601 else: 1602 backfill_models = None 1603 1604 models_override: t.Optional[UniqueKeyDict[str, Model]] = None 1605 if select_models: 1606 try: 1607 models_override = model_selector.select_models( 1608 select_models, 1609 environment, 1610 fallback_env_name=create_from or c.PROD, 1611 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1612 ) 1613 except SQLMeshError as e: 1614 logger.exception(e) # ensure the full stack trace is logged 1615 raise PlanError( 1616 f"{e}\nCheck the SQLMesh log file for the full stack trace.\nIf the model has been fixed locally, please ensure that the --select-model expression includes it." 1617 ) 1618 if not backfill_models: 1619 # Only backfill selected models unless explicitly specified. 1620 backfill_models = model_selector.expand_model_selections(select_models) 1621 1622 expanded_restate_models = None 1623 if restate_models is not None: 1624 expanded_restate_models = model_selector.expand_model_selections(restate_models) 1625 1626 if (restate_models is not None and not expanded_restate_models) or ( 1627 backfill_models is not None and not backfill_models 1628 ): 1629 raise PlanError( 1630 "Selector did not return any models. Please check your model selection and try again." 1631 ) 1632 1633 if always_include_local_changes is None: 1634 # default behaviour - if restatements are detected; we operate entirely out of state and ignore local changes 1635 force_no_diff = restate_models is not None or ( 1636 backfill_models is not None and not backfill_models 1637 ) 1638 else: 1639 force_no_diff = not always_include_local_changes 1640 1641 snapshots = self._snapshots(models_override) 1642 context_diff = self._context_diff( 1643 environment or c.PROD, 1644 snapshots=snapshots, 1645 create_from=create_from, 1646 force_no_diff=force_no_diff, 1647 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1648 diff_rendered=diff_rendered, 1649 always_recreate_environment=self.config.plan.always_recreate_environment, 1650 ) 1651 modified_model_names = { 1652 *context_diff.modified_snapshots, 1653 *[s.name for s in context_diff.added], 1654 } 1655 1656 if ( 1657 is_dev 1658 and not include_unmodified 1659 and backfill_models is None 1660 and expanded_restate_models is None 1661 ): 1662 # Only backfill modified and added models. 1663 # This ensures that no models outside the impacted sub-DAG(s) will be backfilled unexpectedly. 1664 backfill_models = modified_model_names or None 1665 1666 max_interval_end_per_model = None 1667 default_start, default_end = None, None 1668 if not run: 1669 ignore_cron = False 1670 max_interval_end_per_model = self._get_max_interval_end_per_model( 1671 snapshots, backfill_models 1672 ) 1673 # If no end date is specified, use the max interval end from prod 1674 # to prevent unintended evaluation of the entire DAG. 1675 default_start, default_end = self._get_plan_default_start_end( 1676 snapshots, 1677 max_interval_end_per_model, 1678 backfill_models, 1679 modified_model_names, 1680 execution_time or now(), 1681 ) 1682 1683 # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model. 1684 self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values()) 1685 1686 start_override_per_model = self._calculate_start_override_per_model( 1687 min_intervals, 1688 start or default_start, 1689 end or default_end, 1690 execution_time or now(), 1691 backfill_models, 1692 snapshots, 1693 max_interval_end_per_model, 1694 ) 1695 1696 if not self.config.virtual_environment_mode.is_full: 1697 forward_only = True 1698 elif forward_only is None: 1699 forward_only = self.config.plan.forward_only 1700 1701 # When handling prod restatements, only clear intervals from other model versions if we are using full virtual environments 1702 # If we are not, then there is no point, because none of the data in dev environments can be promoted by definition 1703 restate_all_snapshots = ( 1704 expanded_restate_models is not None 1705 and not is_dev 1706 and self.config.virtual_environment_mode.is_full 1707 ) 1708 1709 return self.PLAN_BUILDER_TYPE( 1710 context_diff=context_diff, 1711 start=start, 1712 end=end, 1713 execution_time=execution_time, 1714 apply=self.apply, 1715 restate_models=expanded_restate_models, 1716 restate_all_snapshots=restate_all_snapshots, 1717 backfill_models=backfill_models, 1718 no_gaps=no_gaps, 1719 skip_backfill=skip_backfill, 1720 empty_backfill=empty_backfill, 1721 is_dev=is_dev, 1722 forward_only=forward_only, 1723 allow_destructive_models=expanded_destructive_models, 1724 allow_additive_models=expanded_additive_models, 1725 environment_ttl=environment_ttl, 1726 environment_suffix_target=self.config.environment_suffix_target, 1727 environment_catalog_mapping=self.environment_catalog_mapping, 1728 categorizer_config=categorizer_config or self.auto_categorize_changes, 1729 auto_categorization_enabled=not no_auto_categorization, 1730 effective_from=effective_from, 1731 include_unmodified=include_unmodified, 1732 default_start=default_start, 1733 default_end=default_end, 1734 enable_preview=( 1735 enable_preview if enable_preview is not None else self._plan_preview_enabled 1736 ), 1737 end_bounded=not run, 1738 ensure_finalized_snapshots=self.config.plan.use_finalized_state, 1739 start_override_per_model=start_override_per_model, 1740 end_override_per_model=max_interval_end_per_model, 1741 console=self.console, 1742 user_provided_flags=user_provided_flags, 1743 selected_models={ 1744 dbt_unique_id 1745 for model in model_selector.expand_model_selections(select_models or "*") 1746 if (dbt_unique_id := snapshots[model].node.dbt_unique_id) 1747 }, 1748 explain=explain or False, 1749 ignore_cron=ignore_cron or False, 1750 )
Creates a plan builder.
Arguments:
- environment: The environment to diff and plan against.
- start: The start date of the backfill if there is one.
- end: The end date of the backfill if there is one.
- execution_time: The date/time reference to use for execution time. Defaults to now.
- create_from: The environment to create the target environment from if it doesn't exist. If not specified, the "prod" environment will be used.
- skip_tests: Unit tests are run by default so this will skip them if enabled
- restate_models: A list of either internal or external models, or tags, that need to be restated for the given plan interval. If the target environment is a production environment, ALL snapshots that depended on these upstream tables will have their intervals deleted (even ones not in this current environment). Only the snapshots in this environment will be backfilled whereas others need to be recovered on a future plan application. For development environments only snapshots that are part of this plan will be affected.
- no_gaps: Whether to ensure that new snapshots for models that are already a part of the target environment have no data gaps when compared against previous snapshots for same models.
- skip_backfill: Whether to skip the backfill step. Default: False.
- empty_backfill: Like skip_backfill, but also records processed intervals.
- forward_only: Whether the purpose of the plan is to make forward only changes.
- allow_destructive_models: Models whose forward-only changes are allowed to be destructive.
- no_auto_categorization: Indicates whether to disable automatic categorization of model changes (breaking / non-breaking). If not provided, then the corresponding configuration option determines the behavior.
- categorizer_config: The configuration for the categorizer. Uses the categorizer configuration defined in the project config by default.
- effective_from: The effective date from which to apply forward-only changes on production.
- include_unmodified: Indicates whether to include unmodified models in the target development environment.
- select_models: A list of model selection strings to filter the models that should be included into this plan.
- backfill_models: A list of model selection strings to filter the models for which the data should be backfilled.
- enable_preview: Indicates whether to enable preview for forward-only models in development environments.
- run: Whether to run latest intervals as part of the plan application.
- diff_rendered: Whether the diff should compare raw vs rendered models
- min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered on every model when checking for missing intervals
- always_include_local_changes: Usually when restatements are present, local changes in the filesystem are ignored. However, it can be desirable to deploy changes + restatements in the same plan, so this flag overrides the default behaviour.
Returns:
The plan builder.
1752 def apply( 1753 self, 1754 plan: Plan, 1755 circuit_breaker: t.Optional[t.Callable[[], bool]] = None, 1756 ) -> None: 1757 """Applies a plan by pushing snapshots and backfilling data. 1758 1759 Given a plan, it pushes snapshots into the state sync and then uses the scheduler 1760 to backfill all models. 1761 1762 Args: 1763 plan: The plan to apply. 1764 circuit_breaker: An optional handler which checks if the apply should be aborted. 1765 """ 1766 if ( 1767 not plan.context_diff.has_changes 1768 and not plan.requires_backfill 1769 and not plan.has_unmodified_unpromoted 1770 ): 1771 return 1772 if plan.uncategorized: 1773 raise UncategorizedPlanError("Can't apply a plan with uncategorized changes.") 1774 1775 if plan.explain: 1776 explainer = PlanExplainer( 1777 state_reader=self.state_reader, 1778 default_catalog=self.default_catalog, 1779 console=self.console, 1780 ) 1781 explainer.evaluate(plan.to_evaluatable()) 1782 return 1783 1784 self.notification_target_manager.notify( 1785 NotificationEvent.APPLY_START, 1786 environment=plan.environment_naming_info.name, 1787 plan_id=plan.plan_id, 1788 ) 1789 try: 1790 self._apply(plan, circuit_breaker) 1791 except Exception as e: 1792 self.notification_target_manager.notify( 1793 NotificationEvent.APPLY_FAILURE, 1794 environment=plan.environment_naming_info.name, 1795 plan_id=plan.plan_id, 1796 exc=traceback.format_exc(), 1797 ) 1798 logger.info("Plan application failed.", exc_info=e) 1799 raise e 1800 self.notification_target_manager.notify( 1801 NotificationEvent.APPLY_END, 1802 environment=plan.environment_naming_info.name, 1803 plan_id=plan.plan_id, 1804 )
Applies a plan by pushing snapshots and backfilling data.
Given a plan, it pushes snapshots into the state sync and then uses the scheduler to backfill all models.
Arguments:
- plan: The plan to apply.
- circuit_breaker: An optional handler which checks if the apply should be aborted.
1806 @python_api_analytics 1807 def invalidate_environment(self, name: str, sync: bool = False) -> None: 1808 """Invalidates the target environment by setting its expiration timestamp to now. 1809 1810 Args: 1811 name: The name of the environment to invalidate. 1812 sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will 1813 be deleted asynchronously by the janitor process. 1814 """ 1815 name = Environment.sanitize_name(name) 1816 self.state_sync.invalidate_environment(name) 1817 if sync: 1818 self._cleanup_environments() 1819 self.console.log_success(f"Environment '{name}' deleted.") 1820 else: 1821 self.console.log_success(f"Environment '{name}' invalidated.")
Invalidates the target environment by setting its expiration timestamp to now.
Arguments:
- name: The name of the environment to invalidate.
- sync: If True, the call blocks until the environment is deleted. Otherwise, the environment will be deleted asynchronously by the janitor process.
1823 @python_api_analytics 1824 def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> bool: 1825 """Show a diff of the current context with a given environment. 1826 1827 Args: 1828 environment: The environment to diff against. 1829 detailed: Show the actual SQL differences if True. 1830 1831 Returns: 1832 True if there are changes, False otherwise. 1833 """ 1834 environment = environment or self.config.default_target_environment 1835 environment = Environment.sanitize_name(environment) 1836 context_diff = self._context_diff(environment) 1837 self.console.show_environment_difference_summary( 1838 context_diff, 1839 no_diff=not detailed, 1840 ) 1841 if context_diff.has_changes: 1842 self.console.show_model_difference_summary( 1843 context_diff, 1844 EnvironmentNamingInfo.from_environment_catalog_mapping( 1845 self.environment_catalog_mapping, 1846 name=environment, 1847 suffix_target=self.config.environment_suffix_target, 1848 normalize_name=context_diff.normalize_environment_name, 1849 ), 1850 self.default_catalog, 1851 no_diff=not detailed, 1852 ) 1853 return context_diff.has_changes
Show a diff of the current context with a given environment.
Arguments:
- environment: The environment to diff against.
- detailed: Show the actual SQL differences if True.
Returns:
True if there are changes, False otherwise.
1855 @python_api_analytics 1856 def table_diff( 1857 self, 1858 source: str, 1859 target: str, 1860 on: t.Optional[t.List[str] | exp.Condition] = None, 1861 skip_columns: t.Optional[t.List[str]] = None, 1862 select_models: t.Optional[t.Collection[str]] = None, 1863 where: t.Optional[str | exp.Condition] = None, 1864 limit: int = 20, 1865 show: bool = True, 1866 show_sample: bool = True, 1867 decimals: int = 3, 1868 skip_grain_check: bool = False, 1869 warn_grain_check: bool = False, 1870 temp_schema: t.Optional[str] = None, 1871 schema_diff_ignore_case: bool = False, 1872 **kwargs: t.Any, # catch-all to prevent an 'unexpected keyword argument' error if an table_diff extension passes in some extra arguments 1873 ) -> t.List[TableDiff]: 1874 """Show a diff between two tables. 1875 1876 Args: 1877 source: The source environment or table. 1878 target: The target environment or table. 1879 on: The join condition, table aliases must be "s" and "t" for source and target. 1880 If omitted, the table's grain will be used. 1881 skip_columns: The columns to skip when computing the table diff. 1882 select_models: The models or snapshots to use when environments are passed in. 1883 where: An optional where statement to filter results. 1884 limit: The limit of the sample dataframe. 1885 show: Show the table diff output in the console. 1886 show_sample: Show the sample dataframe in the console. Requires show=True. 1887 decimals: The number of decimal places to keep when comparing floating point columns. 1888 skip_grain_check: Skip check for rows that contain null or duplicate grains. 1889 temp_schema: The schema to use for temporary tables. 1890 1891 Returns: 1892 The list of TableDiff objects containing schema and summary differences. 1893 """ 1894 1895 if "|" in source or "|" in target: 1896 raise ConfigError( 1897 "Cross-database table diffing is available in Tobiko Cloud. Read more here: " 1898 "https://sqlmesh.readthedocs.io/en/stable/guides/tablediff/#diffing-tables-or-views-across-gateways" 1899 ) 1900 1901 table_diffs: t.List[TableDiff] = [] 1902 1903 # Diffs multiple or a single model across two environments 1904 if select_models: 1905 source_env = self.state_reader.get_environment(source) 1906 target_env = self.state_reader.get_environment(target) 1907 if not source_env: 1908 raise SQLMeshError(f"Could not find environment '{source}'") 1909 if not target_env: 1910 raise SQLMeshError(f"Could not find environment '{target}'") 1911 criteria = ", ".join(f"'{c}'" for c in select_models) 1912 try: 1913 selected_models = self._new_selector().expand_model_selections(select_models) 1914 if not selected_models: 1915 self.console.log_status_update( 1916 f"No models matched the selection criteria: {criteria}" 1917 ) 1918 except Exception as e: 1919 raise SQLMeshError(e) 1920 1921 models_to_diff: t.List[ 1922 t.Tuple[Model, EngineAdapter, str, str, t.Optional[t.List[str] | exp.Condition]] 1923 ] = [] 1924 models_without_grain: t.List[Model] = [] 1925 source_snapshots_to_name = { 1926 snapshot.name: snapshot for snapshot in source_env.snapshots 1927 } 1928 target_snapshots_to_name = { 1929 snapshot.name: snapshot for snapshot in target_env.snapshots 1930 } 1931 1932 for model_fqn in selected_models: 1933 model = self._models[model_fqn] 1934 adapter = self._get_engine_adapter(model.gateway) 1935 source_snapshot = source_snapshots_to_name.get(model.fqn) 1936 target_snapshot = target_snapshots_to_name.get(model.fqn) 1937 1938 if target_snapshot and source_snapshot: 1939 if (source_snapshot.fingerprint != target_snapshot.fingerprint) and ( 1940 (source_snapshot.version != target_snapshot.version) 1941 or source_snapshot.is_forward_only 1942 ): 1943 # Compare the virtual layer instead of the physical layer because the virtual layer is guaranteed to point 1944 # to the correct/active snapshot for the model in the specified environment, taking into account things like dev previews 1945 source = source_snapshot.qualified_view_name.for_environment( 1946 source_env.naming_info, adapter.dialect 1947 ) 1948 target = target_snapshot.qualified_view_name.for_environment( 1949 target_env.naming_info, adapter.dialect 1950 ) 1951 model_on = on or model.on 1952 if not model_on: 1953 models_without_grain.append(model) 1954 else: 1955 models_to_diff.append((model, adapter, source, target, model_on)) 1956 1957 if models_without_grain: 1958 model_names = "\n".join( 1959 f"─ {model.name} \n at '{model._path}'" for model in models_without_grain 1960 ) 1961 message = ( 1962 "SQLMesh doesn't know how to join the tables for the following models:\n" 1963 f"{model_names}\n\n" 1964 "Please specify a `grain` in each model definition. It must be unique and not null." 1965 ) 1966 if warn_grain_check: 1967 self.console.log_warning(message) 1968 else: 1969 raise SQLMeshError(message) 1970 1971 if models_to_diff: 1972 self.console.show_table_diff_details( 1973 [model[0].name for model in models_to_diff], 1974 ) 1975 1976 self.console.start_table_diff_progress(len(models_to_diff)) 1977 try: 1978 tasks_num = min(len(models_to_diff), self.concurrent_tasks) 1979 table_diffs = concurrent_apply_to_values( 1980 list(models_to_diff), 1981 lambda model_info: self._model_diff( 1982 model=model_info[0], 1983 adapter=model_info[1], 1984 source=model_info[2], 1985 target=model_info[3], 1986 on=model_info[4], 1987 source_alias=source_env.name, 1988 target_alias=target_env.name, 1989 limit=limit, 1990 decimals=decimals, 1991 skip_columns=skip_columns, 1992 where=where, 1993 show=show, 1994 temp_schema=temp_schema, 1995 skip_grain_check=skip_grain_check, 1996 schema_diff_ignore_case=schema_diff_ignore_case, 1997 ), 1998 tasks_num=tasks_num, 1999 ) 2000 self.console.stop_table_diff_progress(success=True) 2001 except: 2002 self.console.stop_table_diff_progress(success=False) 2003 raise 2004 elif selected_models: 2005 self.console.log_status_update( 2006 f"No models contain differences with the selection criteria: {criteria}" 2007 ) 2008 2009 else: 2010 table_diffs = [ 2011 self._table_diff( 2012 source=source, 2013 target=target, 2014 source_alias=source, 2015 target_alias=target, 2016 limit=limit, 2017 decimals=decimals, 2018 adapter=self.engine_adapter, 2019 on=on, 2020 skip_columns=skip_columns, 2021 where=where, 2022 schema_diff_ignore_case=schema_diff_ignore_case, 2023 ) 2024 ] 2025 2026 if show: 2027 self.console.show_table_diff(table_diffs, show_sample, skip_grain_check, temp_schema) 2028 2029 return table_diffs
Show a diff between two tables.
Arguments:
- source: The source environment or table.
- target: The target environment or table.
- on: The join condition, table aliases must be "s" and "t" for source and target. If omitted, the table's grain will be used.
- skip_columns: The columns to skip when computing the table diff.
- select_models: The models or snapshots to use when environments are passed in.
- where: An optional where statement to filter results.
- limit: The limit of the sample dataframe.
- show: Show the table diff output in the console.
- show_sample: Show the sample dataframe in the console. Requires show=True.
- decimals: The number of decimal places to keep when comparing floating point columns.
- skip_grain_check: Skip check for rows that contain null or duplicate grains.
- temp_schema: The schema to use for temporary tables.
Returns:
The list of TableDiff objects containing schema and summary differences.
2110 @python_api_analytics 2111 def get_dag( 2112 self, select_models: t.Optional[t.Collection[str]] = None, **options: t.Any 2113 ) -> GraphHTML: 2114 """Gets an HTML object representation of the DAG. 2115 2116 Args: 2117 select_models: A list of model selection strings that should be included in the dag. 2118 Returns: 2119 An html object that renders the dag. 2120 """ 2121 dag = ( 2122 self.dag.prune(*self._new_selector().expand_model_selections(select_models)) 2123 if select_models 2124 else self.dag 2125 ) 2126 2127 nodes = {} 2128 edges: t.List[t.Dict] = [] 2129 2130 for node, deps in dag.graph.items(): 2131 nodes[node] = { 2132 "id": node, 2133 "label": node.split(".")[-1], 2134 "title": f"<span>{node}</span>", 2135 } 2136 edges.extend({"from": d, "to": node} for d in deps) 2137 2138 return GraphHTML( 2139 nodes, 2140 edges, 2141 options={ 2142 "height": "100%", 2143 "width": "100%", 2144 "interaction": {}, 2145 "layout": { 2146 "hierarchical": { 2147 "enabled": True, 2148 "nodeSpacing": 200, 2149 "sortMethod": "directed", 2150 }, 2151 }, 2152 "nodes": { 2153 "shape": "box", 2154 }, 2155 **options, 2156 }, 2157 )
Gets an HTML object representation of the DAG.
Arguments:
- select_models: A list of model selection strings that should be included in the dag.
Returns:
An html object that renders the dag.
2159 @python_api_analytics 2160 def render_dag(self, path: str, select_models: t.Optional[t.Collection[str]] = None) -> None: 2161 """Render the dag as HTML and save it to a file. 2162 2163 Args: 2164 path: filename to save the dag html to 2165 select_models: A list of model selection strings that should be included in the dag. 2166 """ 2167 file_path = Path(path) 2168 suffix = file_path.suffix 2169 if suffix != ".html": 2170 if suffix: 2171 get_console().log_warning( 2172 f"The extension {suffix} does not designate an html file. A file with a `.html` extension will be created instead." 2173 ) 2174 path = str(file_path.with_suffix(".html")) 2175 2176 with open(path, "w", encoding="utf-8") as file: 2177 file.write(str(self.get_dag(select_models)))
Render the dag as HTML and save it to a file.
Arguments:
- path: filename to save the dag html to
- select_models: A list of model selection strings that should be included in the dag.
2179 @python_api_analytics 2180 def create_test( 2181 self, 2182 model: str, 2183 input_queries: t.Dict[str, str], 2184 overwrite: bool = False, 2185 variables: t.Optional[t.Dict[str, str]] = None, 2186 path: t.Optional[str] = None, 2187 name: t.Optional[str] = None, 2188 include_ctes: bool = False, 2189 ) -> None: 2190 """Generate a unit test fixture for a given model. 2191 2192 Args: 2193 model: The model to test. 2194 input_queries: Mapping of model names to queries. Each model included in this mapping 2195 will be populated in the test based on the results of the corresponding query. 2196 overwrite: Whether to overwrite the existing test in case of a file path collision. 2197 When set to False, an error will be raised if there is such a collision. 2198 variables: Key-value pairs that will define variables needed by the model. 2199 path: The file path corresponding to the fixture, relative to the test directory. 2200 By default, the fixture will be created under the test directory and the file name 2201 will be inferred from the test's name. 2202 name: The name of the test. This is inferred from the model name by default. 2203 include_ctes: When true, CTE fixtures will also be generated. 2204 """ 2205 input_queries = { 2206 # The get_model here has two purposes: return normalized names & check for missing deps 2207 self.get_model(dep, raise_if_missing=True).fqn: query 2208 for dep, query in input_queries.items() 2209 } 2210 2211 try: 2212 model_to_test = self.get_model(model, raise_if_missing=True) 2213 test_adapter = self.test_connection_config.create_engine_adapter( 2214 register_comments_override=False 2215 ) 2216 2217 generate_test( 2218 model=model_to_test, 2219 input_queries=input_queries, 2220 models=self._models, 2221 engine_adapter=self._get_engine_adapter(model_to_test.gateway), 2222 test_engine_adapter=test_adapter, 2223 project_path=self.path, 2224 overwrite=overwrite, 2225 variables=variables, 2226 path=path, 2227 name=name, 2228 include_ctes=include_ctes, 2229 ) 2230 finally: 2231 if test_adapter: 2232 test_adapter.close()
Generate a unit test fixture for a given model.
Arguments:
- model: The model to test.
- input_queries: Mapping of model names to queries. Each model included in this mapping will be populated in the test based on the results of the corresponding query.
- overwrite: Whether to overwrite the existing test in case of a file path collision. When set to False, an error will be raised if there is such a collision.
- variables: Key-value pairs that will define variables needed by the model.
- path: The file path corresponding to the fixture, relative to the test directory. By default, the fixture will be created under the test directory and the file name will be inferred from the test's name.
- name: The name of the test. This is inferred from the model name by default.
- include_ctes: When true, CTE fixtures will also be generated.
2234 @python_api_analytics 2235 def test( 2236 self, 2237 match_patterns: t.Optional[t.List[str]] = None, 2238 tests: t.Optional[t.List[str]] = None, 2239 verbosity: Verbosity = Verbosity.DEFAULT, 2240 preserve_fixtures: bool = False, 2241 stream: t.Optional[t.TextIO] = None, 2242 ) -> ModelTextTestResult: 2243 """Discover and run model tests""" 2244 if verbosity >= Verbosity.VERBOSE: 2245 import pandas as pd 2246 2247 pd.set_option("display.max_columns", None) 2248 2249 test_meta = self.select_tests(tests=tests, patterns=match_patterns) 2250 2251 result = run_tests( 2252 model_test_metadata=test_meta, 2253 models=self._models, 2254 config=self.config, 2255 selected_gateway=self.selected_gateway, 2256 dialect=self.default_dialect, 2257 verbosity=verbosity, 2258 preserve_fixtures=preserve_fixtures, 2259 stream=stream, 2260 default_catalog=self.default_catalog, 2261 default_catalog_dialect=self.config.dialect or "", 2262 ) 2263 2264 self.console.log_test_results( 2265 result, 2266 self.test_connection_config._engine_adapter.DIALECT, 2267 ) 2268 2269 return result
Discover and run model tests
2271 @python_api_analytics 2272 def audit( 2273 self, 2274 start: TimeLike, 2275 end: TimeLike, 2276 *, 2277 models: t.Optional[t.Iterator[str]] = None, 2278 execution_time: t.Optional[TimeLike] = None, 2279 ) -> bool: 2280 """Audit models. 2281 2282 Args: 2283 start: The start of the interval to audit. 2284 end: The end of the interval to audit. 2285 models: The models to audit. All models will be audited if not specified. 2286 execution_time: The date/time time reference to use for execution time. Defaults to now. 2287 2288 Returns: 2289 False if any of the audits failed, True otherwise. 2290 """ 2291 2292 snapshots = ( 2293 [self.get_snapshot(model, raise_if_missing=True) for model in models] 2294 if models 2295 else self.snapshots.values() 2296 ) 2297 2298 num_audits = sum(len(snapshot.node.audits_with_args) for snapshot in snapshots) 2299 self.console.log_status_update(f"Found {num_audits} audit(s).") 2300 2301 errors = [] 2302 skipped_count = 0 2303 for snapshot in snapshots: 2304 for audit_result in self.snapshot_evaluator.audit( 2305 snapshot=snapshot, 2306 start=start, 2307 end=end, 2308 execution_time=execution_time, 2309 snapshots=self.snapshots, 2310 ): 2311 audit_id = f"{audit_result.audit.name}" 2312 if audit_result.model: 2313 audit_id += f" on model {audit_result.model.name}" 2314 2315 if audit_result.skipped: 2316 self.console.log_status_update(f"{audit_id} ⏸️ SKIPPED.") 2317 skipped_count += 1 2318 elif audit_result.count: 2319 errors.append(audit_result) 2320 self.console.log_status_update( 2321 f"{audit_id} ❌ [red]FAIL [{audit_result.count}][/red]." 2322 ) 2323 else: 2324 self.console.log_status_update(f"{audit_id} ✅ [green]PASS[/green].") 2325 2326 self.console.log_status_update( 2327 f"\nFinished with {len(errors)} audit error{'' if len(errors) == 1 else 's'} " 2328 f"and {skipped_count} audit{'' if skipped_count == 1 else 's'} skipped." 2329 ) 2330 for error in errors: 2331 self.console.log_status_update( 2332 f"\nFailure in audit {error.audit.name} ({error.audit._path})." 2333 ) 2334 self.console.log_status_update(f"Got {error.count} results, expected 0.") 2335 if error.query: 2336 self.console.show_sql( 2337 f"{error.query.sql(dialect=self.snapshot_evaluator.adapter.dialect)}" 2338 ) 2339 2340 self.console.log_status_update("Done.") 2341 return not errors
Audit models.
Arguments:
- start: The start of the interval to audit.
- end: The end of the interval to audit.
- models: The models to audit. All models will be audited if not specified.
- execution_time: The date/time time reference to use for execution time. Defaults to now.
Returns:
False if any of the audits failed, True otherwise.
2343 @python_api_analytics 2344 def rewrite(self, sql: str, dialect: str = "") -> exp.Expression: 2345 """Rewrite a sql expression with semantic references into an executable query. 2346 2347 https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/ 2348 2349 Args: 2350 sql: The sql string to rewrite. 2351 dialect: The dialect of the sql string, defaults to the project dialect. 2352 2353 Returns: 2354 A SQLGlot expression with semantic references expanded. 2355 """ 2356 return rewrite( 2357 sql, 2358 graph=ReferenceGraph(self.models.values()), 2359 metrics=self._metrics, 2360 dialect=dialect or self.default_dialect, 2361 )
Rewrite a sql expression with semantic references into an executable query.
https://sqlmesh.readthedocs.io/en/latest/concepts/metrics/overview/
Arguments:
- sql: The sql string to rewrite.
- dialect: The dialect of the sql string, defaults to the project dialect.
Returns:
A SQLGlot expression with semantic references expanded.
2363 @python_api_analytics 2364 def check_intervals( 2365 self, 2366 environment: t.Optional[str], 2367 no_signals: bool, 2368 select_models: t.Collection[str], 2369 start: t.Optional[TimeLike] = None, 2370 end: t.Optional[TimeLike] = None, 2371 ) -> t.Dict[Snapshot, SnapshotIntervals]: 2372 """Check intervals for a given environment. 2373 2374 Args: 2375 environment: The environment or prod if None. 2376 select_models: A list of model selection strings to show intervals for. 2377 start: The start of the intervals to check. 2378 end: The end of the intervals to check. 2379 """ 2380 2381 environment = environment or c.PROD 2382 env = self.state_reader.get_environment(environment) 2383 if not env: 2384 raise SQLMeshError(f"Environment '{environment}' was not found.") 2385 2386 snapshots = {k.name: v for k, v in self.state_sync.get_snapshots(env.snapshots).items()} 2387 2388 missing = { 2389 k.name: v 2390 for k, v in missing_intervals( 2391 snapshots.values(), start=start, end=end, execution_time=end 2392 ).items() 2393 } 2394 2395 if select_models: 2396 selected: t.Collection[str] = self._select_models_for_run( 2397 select_models, True, snapshots.values() 2398 ) 2399 else: 2400 selected = snapshots.keys() 2401 2402 results = {} 2403 execution_context = self.execution_context(snapshots=snapshots) 2404 2405 for fqn in selected: 2406 snapshot = snapshots[fqn] 2407 intervals = missing.get(fqn) or [] 2408 2409 results[snapshot] = SnapshotIntervals( 2410 snapshot.snapshot_id, 2411 intervals 2412 if no_signals 2413 else snapshot.check_ready_intervals(intervals, execution_context), 2414 ) 2415 2416 return results
Check intervals for a given environment.
Arguments:
- environment: The environment or prod if None.
- select_models: A list of model selection strings to show intervals for.
- start: The start of the intervals to check.
- end: The end of the intervals to check.
2418 @python_api_analytics 2419 def migrate(self) -> None: 2420 """Migrates SQLMesh to the current running version. 2421 2422 Please contact your SQLMesh administrator before doing this. 2423 """ 2424 self.notification_target_manager.notify(NotificationEvent.MIGRATION_START) 2425 self._load_materializations() 2426 try: 2427 self._new_state_sync().migrate( 2428 promoted_snapshots_only=self.config.migration.promoted_snapshots_only, 2429 ) 2430 except Exception as e: 2431 self.notification_target_manager.notify( 2432 NotificationEvent.MIGRATION_FAILURE, traceback.format_exc() 2433 ) 2434 raise e 2435 self.notification_target_manager.notify(NotificationEvent.MIGRATION_END)
Migrates SQLMesh to the current running version.
Please contact your SQLMesh administrator before doing this.
2437 @python_api_analytics 2438 def rollback(self) -> None: 2439 """Rolls back SQLMesh to the previous migration. 2440 2441 Please contact your SQLMesh administrator before doing this. This action cannot be undone. 2442 """ 2443 self._new_state_sync().rollback()
Rolls back SQLMesh to the previous migration.
Please contact your SQLMesh administrator before doing this. This action cannot be undone.
2445 @python_api_analytics 2446 def create_external_models(self, strict: bool = False) -> None: 2447 """Create a file to document the schema of external models. 2448 2449 The external models file contains all columns and types of external models, allowing for more 2450 robust lineage, validation, and optimizations. 2451 2452 Args: 2453 strict: If True, raise an error if the external model is missing in the database. 2454 """ 2455 if not self._models: 2456 self.load(update_schemas=False) 2457 2458 for path, config in self.configs.items(): 2459 deprecated_yaml = path / c.EXTERNAL_MODELS_DEPRECATED_YAML 2460 2461 external_models_yaml = ( 2462 path / c.EXTERNAL_MODELS_YAML if not deprecated_yaml.exists() else deprecated_yaml 2463 ) 2464 2465 external_models_gateway: t.Optional[str] = self.gateway or self.config.default_gateway 2466 if not external_models_gateway: 2467 # can happen if there was no --gateway defined and the default_gateway is '' 2468 # which means that the single gateway syntax is being used which means there is 2469 # no named gateway which means we should not stamp `gateway:` on the external models 2470 external_models_gateway = None 2471 2472 create_external_models_file( 2473 path=external_models_yaml, 2474 models=UniqueKeyDict( 2475 "models", 2476 { 2477 fqn: model 2478 for fqn, model in self._models.items() 2479 if self.config_for_node(model) is config 2480 }, 2481 ), 2482 adapter=self.engine_adapter, 2483 state_reader=self.state_reader, 2484 dialect=config.model_defaults.dialect, 2485 gateway=external_models_gateway, 2486 max_workers=self.concurrent_tasks, 2487 strict=strict, 2488 )
Create a file to document the schema of external models.
The external models file contains all columns and types of external models, allowing for more robust lineage, validation, and optimizations.
Arguments:
- strict: If True, raise an error if the external model is missing in the database.
2490 @python_api_analytics 2491 def print_info( 2492 self, skip_connection: bool = False, verbosity: Verbosity = Verbosity.DEFAULT 2493 ) -> None: 2494 """Prints information about connections, models, macros, etc. to the console.""" 2495 self.console.log_status_update(f"Models: {len(self.models)}") 2496 self.console.log_status_update(f"Macros: {len(self._macros) - len(macro.get_registry())}") 2497 2498 if skip_connection: 2499 return 2500 2501 if verbosity >= Verbosity.VERBOSE: 2502 self.console.log_status_update("") 2503 print_config(self.config.get_connection(self.gateway), self.console, "Connection") 2504 print_config( 2505 self.config.get_test_connection(self.gateway), self.console, "Test Connection" 2506 ) 2507 print_config( 2508 self.config.get_state_connection(self.gateway), self.console, "State Connection" 2509 ) 2510 2511 self._try_connection("data warehouse", self.engine_adapter.ping) 2512 state_connection = self.config.get_state_connection(self.gateway) 2513 if state_connection: 2514 self._try_connection("state backend", state_connection.connection_validator())
Prints information about connections, models, macros, etc. to the console.
2516 @python_api_analytics 2517 def print_environment_names(self) -> None: 2518 """Prints all environment names along with expiry datetime.""" 2519 result = self._new_state_sync().get_environments_summary() 2520 if not result: 2521 raise SQLMeshError( 2522 "This project has no environments. Create an environment using the `sqlmesh plan` command." 2523 ) 2524 self.console.print_environments(result)
Prints all environment names along with expiry datetime.
2526 def close(self) -> None: 2527 """Releases all resources allocated by this context.""" 2528 if self._snapshot_evaluator: 2529 self._snapshot_evaluator.close() 2530 2531 if self._state_sync: 2532 self._state_sync.close()
Releases all resources allocated by this context.
2587 @python_api_analytics 2588 def table_name( 2589 self, model_name: str, environment: t.Optional[str] = None, prod: bool = False 2590 ) -> str: 2591 """Returns the name of the pysical table for the given model name in the target environment. 2592 2593 Args: 2594 model_name: The name of the model. 2595 environment: The environment to source the model version from. 2596 prod: If True, return the name of the physical table that will be used in production for the model version 2597 promoted in the target environment. 2598 2599 Returns: 2600 The name of the physical table. 2601 """ 2602 environment = environment or self.config.default_target_environment 2603 fqn = self._node_or_snapshot_to_fqn(model_name) 2604 target_env = self.state_reader.get_environment(environment) 2605 if not target_env: 2606 raise SQLMeshError(f"Environment '{environment}' was not found.") 2607 2608 snapshot_info = None 2609 for s in target_env.snapshots: 2610 if s.name == fqn: 2611 snapshot_info = s 2612 break 2613 if not snapshot_info: 2614 raise SQLMeshError( 2615 f"Model '{model_name}' was not found in environment '{environment}'." 2616 ) 2617 2618 if target_env.name == c.PROD or prod: 2619 return snapshot_info.table_name() 2620 2621 snapshots = self.state_reader.get_snapshots(target_env.snapshots) 2622 deployability_index = DeployabilityIndex.create(snapshots) 2623 2624 return snapshot_info.table_name( 2625 is_deployable=deployability_index.is_deployable(snapshot_info.snapshot_id) 2626 )
Returns the name of the pysical table for the given model name in the target environment.
Arguments:
- model_name: The name of the model.
- environment: The environment to source the model version from.
- prod: If True, return the name of the physical table that will be used in production for the model version promoted in the target environment.
Returns:
The name of the physical table.
2628 def clear_caches(self) -> None: 2629 paths_to_remove = [path / c.CACHE for path in self.configs] 2630 paths_to_remove.append(self.cache_dir) 2631 2632 if IS_WINDOWS: 2633 paths_to_remove = [fix_windows_path(path) for path in paths_to_remove] 2634 2635 for path in paths_to_remove: 2636 if path.exists(): 2637 rmtree(path) 2638 2639 if isinstance(self._state_sync, CachingStateSync): 2640 self._state_sync.clear_cache()
2642 def export_state( 2643 self, 2644 output_file: Path, 2645 environment_names: t.Optional[t.List[str]] = None, 2646 local_only: bool = False, 2647 confirm: bool = True, 2648 ) -> None: 2649 from sqlmesh.core.state_sync.export_import import export_state 2650 2651 # trigger a connection to the StateSync so we can fail early if there is a problem 2652 # note we still need to do this even if we are doing a local export so we know what 'versions' to write 2653 self.state_sync.get_versions(validate=True) 2654 2655 local_snapshots = self.snapshots if local_only else None 2656 2657 if self.console.start_state_export( 2658 output_file=output_file, 2659 gateway=self.selected_gateway, 2660 state_connection_config=self._state_connection_config, 2661 environment_names=environment_names, 2662 local_only=local_only, 2663 confirm=confirm, 2664 ): 2665 try: 2666 export_state( 2667 state_sync=self.state_sync, 2668 output_file=output_file, 2669 local_snapshots=local_snapshots, 2670 environment_names=environment_names, 2671 console=self.console, 2672 ) 2673 self.console.stop_state_export(success=True, output_file=output_file) 2674 except: 2675 self.console.stop_state_export(success=False, output_file=output_file) 2676 raise
2678 def import_state(self, input_file: Path, clear: bool = False, confirm: bool = True) -> None: 2679 from sqlmesh.core.state_sync.export_import import import_state 2680 2681 if self.console.start_state_import( 2682 input_file=input_file, 2683 gateway=self.selected_gateway, 2684 state_connection_config=self._state_connection_config, 2685 clear=clear, 2686 confirm=confirm, 2687 ): 2688 try: 2689 import_state( 2690 state_sync=self.state_sync, 2691 input_file=input_file, 2692 clear=clear, 2693 console=self.console, 2694 ) 2695 self.console.stop_state_import(success=True, input_file=input_file) 2696 except: 2697 self.console.stop_state_import(success=False, input_file=input_file) 2698 raise
2738 @cached_property 2739 def cache_dir(self) -> Path: 2740 if self.config.cache_dir: 2741 cache_path = Path(self.config.cache_dir) 2742 if cache_path.is_absolute(): 2743 return cache_path 2744 return self.path / cache_path 2745 2746 # Default to .cache directory in the project path 2747 return self.path / c.CACHE
2749 @cached_property 2750 def engine_adapters(self) -> t.Dict[str, EngineAdapter]: 2751 """Returns all the engine adapters for the gateways defined in the configurations.""" 2752 adapters: t.Dict[str, EngineAdapter] = {self.selected_gateway: self.engine_adapter} 2753 for config in self.configs.values(): 2754 for gateway_name in config.gateways: 2755 if gateway_name not in adapters: 2756 connection = config.get_connection(gateway_name) 2757 adapter = connection.create_engine_adapter( 2758 concurrent_tasks=self.concurrent_tasks, 2759 ) 2760 adapters[gateway_name] = adapter 2761 return adapters
Returns all the engine adapters for the gateways defined in the configurations.
2763 @cached_property 2764 def default_catalog_per_gateway(self) -> t.Dict[str, str]: 2765 """Returns the default catalogs for each engine adapter.""" 2766 return self._scheduler.get_default_catalog_per_gateway(self)
Returns the default catalogs for each engine adapter.
2786 @cached_property 2787 def environment_catalog_mapping(self) -> RegexKeyDict: 2788 engine_adapter = None 2789 try: 2790 engine_adapter = self.engine_adapter 2791 except Exception: 2792 pass 2793 2794 if ( 2795 self.config.environment_catalog_mapping 2796 and engine_adapter 2797 and not self.engine_adapter.catalog_support.is_multi_catalog_supported 2798 ): 2799 raise SQLMeshError( 2800 "Environment catalog mapping is only supported for engine adapters that support multiple catalogs" 2801 ) 2802 return self.config.environment_catalog_mapping
3192 def lint_models( 3193 self, 3194 models: t.Optional[t.Iterable[t.Union[str, Model]]] = None, 3195 raise_on_error: bool = True, 3196 ) -> t.List[AnnotatedRuleViolation]: 3197 found_error = False 3198 3199 model_list = ( 3200 list(self.get_model(model, raise_if_missing=True) for model in models) 3201 if models 3202 else self.models.values() 3203 ) 3204 all_violations = [] 3205 for model in model_list: 3206 # Linter may be `None` if the context is not loaded yet 3207 if linter := self._linters.get(model.project): 3208 lint_violation, violations = ( 3209 linter.lint_model(model, self, console=self.console) or found_error 3210 ) 3211 if lint_violation: 3212 found_error = True 3213 all_violations.extend(violations) 3214 3215 if raise_on_error and found_error: 3216 raise LinterError( 3217 "Linter detected errors in the code. Please fix them before proceeding." 3218 ) 3219 3220 return all_violations
3222 def select_tests( 3223 self, 3224 tests: t.Optional[t.List[str]] = None, 3225 patterns: t.Optional[t.List[str]] = None, 3226 ) -> t.List[ModelTestMetadata]: 3227 """Filter pre-loaded test metadata based on tests and patterns.""" 3228 3229 test_meta = self._model_test_metadata 3230 3231 if tests: 3232 filtered_tests = [] 3233 for test in tests: 3234 if "::" in test: 3235 if test in self._model_test_metadata_fully_qualified_name_index: 3236 filtered_tests.append( 3237 self._model_test_metadata_fully_qualified_name_index[test] 3238 ) 3239 else: 3240 test_path = Path(test) 3241 if test_path in self._model_test_metadata_path_index: 3242 filtered_tests.extend(self._model_test_metadata_path_index[test_path]) 3243 3244 test_meta = filtered_tests 3245 3246 if patterns: 3247 test_meta = filter_tests_by_patterns(test_meta, patterns) 3248 3249 return test_meta
Filter pre-loaded test metadata based on tests and patterns.
Encapsulates a SQLMesh environment supplying convenient functions to perform various tasks.
Arguments:
- notification_targets: The notification target to use. Defaults to what is defined in config.
- paths: The directories containing SQLMesh files.
- config: A Config object or the name of a Config object in config.py.
- connection: The name of the connection. If not specified the first connection as it appears in configuration will be used.
- test_connection: The name of the connection to use for tests. If not specified the first connection as it appears in configuration will be used.
- concurrent_tasks: The maximum number of tasks that can use the connection concurrently.
- load: Whether or not to automatically load all models and macros (default True).
- console: The rich instance used for printing out CLI command results.
- users: A list of users to make known to SQLMesh.
The type of config object to use (default: Config).
Inherited Members
- GenericContext
- GenericContext
- PLAN_BUILDER_TYPE
- configs
- dag
- gateway
- environment_ttl
- pinned_environments
- auto_categorize_changes
- selected_gateway
- console
- notification_targets
- users
- default_dialect
- engine_adapter
- snapshot_evaluator
- execution_context
- upsert_model
- scheduler
- create_scheduler
- state_sync
- state_reader
- refresh
- load
- run
- run_janitor
- destroy
- get_model
- get_snapshot
- config_for_path
- config_for_node
- models
- metrics
- standalone_audits
- models_with_tests
- snapshots
- requirements
- default_catalog
- render
- evaluate
- format
- plan
- plan_builder
- apply
- invalidate_environment
- diff
- table_diff
- get_dag
- render_dag
- create_test
- test
- audit
- rewrite
- check_intervals
- migrate
- rollback
- create_external_models
- print_info
- print_environment_names
- close
- table_name
- clear_caches
- export_state
- import_state
- cache_dir
- engine_adapters
- default_catalog_per_gateway
- concurrent_tasks
- connection_config
- test_connection_config
- environment_catalog_mapping
- lint_models
- select_tests