SnapshotEvaluator
A snapshot evaluator is responsible for evaluating a snapshot given some runtime arguments, e.g. start and end timestamps.
Evaluation
Snapshot evaluation involves determining the queries necessary to evaluate a snapshot and using
sqlmesh.core.engine_adapter to execute the queries. Schemas, tables, and views are created if
they don't exist and data is inserted when applicable.
A snapshot evaluator also promotes and demotes snapshots to a given environment.
Audits
A snapshot evaluator can also run the audits for a snapshot's node. This is often done after a snapshot has been evaluated to check for data quality issues.
For more information about audits, see sqlmesh.core.audit.
1""" 2# SnapshotEvaluator 3 4A snapshot evaluator is responsible for evaluating a snapshot given some runtime arguments, e.g. start 5and end timestamps. 6 7# Evaluation 8 9Snapshot evaluation involves determining the queries necessary to evaluate a snapshot and using 10`sqlmesh.core.engine_adapter` to execute the queries. Schemas, tables, and views are created if 11they don't exist and data is inserted when applicable. 12 13A snapshot evaluator also promotes and demotes snapshots to a given environment. 14 15# Audits 16 17A snapshot evaluator can also run the audits for a snapshot's node. This is often done after a snapshot 18has been evaluated to check for data quality issues. 19 20For more information about audits, see `sqlmesh.core.audit`. 21""" 22 23from __future__ import annotations 24 25import abc 26import logging 27import typing as t 28import sys 29from collections import defaultdict 30from contextlib import contextmanager 31from functools import reduce 32 33from sqlglot import exp, select 34from sqlglot.executor import execute 35from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_not_exception_type 36 37from sqlmesh.core import constants as c 38from sqlmesh.core import dialect as d 39from sqlmesh.core.audit import Audit, StandaloneAudit 40from sqlmesh.core.dialect import schema_ 41from sqlmesh.core.engine_adapter.shared import InsertOverwriteStrategy, DataObjectType, DataObject 42from sqlmesh.core.model.meta import GrantsTargetLayer 43from sqlmesh.core.macros import RuntimeStage 44from sqlmesh.core.model import ( 45 AuditResult, 46 IncrementalUnmanagedKind, 47 Model, 48 SeedModel, 49 SCDType2ByColumnKind, 50 SCDType2ByTimeKind, 51 ViewKind, 52 CustomKind, 53) 54from sqlmesh.core.model.kind import _Incremental, DbtCustomKind 55from sqlmesh.utils import CompletionStatus, columns_to_types_all_known 56from sqlmesh.core.schema_diff import ( 57 has_drop_alteration, 58 TableAlterOperation, 59 has_additive_alteration, 60) 61from sqlmesh.core.snapshot import ( 62 DeployabilityIndex, 63 Intervals, 64 Snapshot, 65 SnapshotId, 66 SnapshotIdBatch, 67 SnapshotInfoLike, 68 SnapshotTableCleanupTask, 69) 70from sqlmesh.core.snapshot.execution_tracker import QueryExecutionTracker 71from sqlmesh.utils import random_id, CorrelationId, AttributeDict 72from sqlmesh.utils.concurrency import ( 73 concurrent_apply_to_snapshots, 74 concurrent_apply_to_values, 75 NodeExecutionFailedError, 76) 77from sqlmesh.utils.date import TimeLike, now, time_like_to_str 78from sqlmesh.utils.errors import ( 79 ConfigError, 80 DestructiveChangeError, 81 MigrationNotSupportedError, 82 SQLMeshError, 83 format_destructive_change_msg, 84 format_additive_change_msg, 85 AdditiveChangeError, 86) 87from sqlmesh.utils.jinja import MacroReturnVal 88 89if sys.version_info >= (3, 12): 90 from importlib import metadata 91else: 92 import importlib_metadata as metadata # type: ignore 93 94if t.TYPE_CHECKING: 95 from sqlmesh.core.engine_adapter._typing import DF, QueryOrDF 96 from sqlmesh.core.engine_adapter.base import EngineAdapter 97 from sqlmesh.core.environment import EnvironmentNamingInfo 98 99logger = logging.getLogger(__name__) 100 101 102class SnapshotCreationFailedError(SQLMeshError): 103 def __init__( 104 self, errors: t.List[NodeExecutionFailedError[SnapshotId]], skipped: t.List[SnapshotId] 105 ): 106 messages = "\n\n".join(f"{error}\n {error.__cause__}" for error in errors) 107 super().__init__(f"Physical table creation failed:\n\n{messages}") 108 self.errors = errors 109 self.skipped = skipped 110 111 112class SnapshotEvaluator: 113 """Evaluates a snapshot given runtime arguments through an arbitrary EngineAdapter. 114 115 The SnapshotEvaluator contains the business logic to generically evaluate a snapshot. 116 It is responsible for delegating queries to the EngineAdapter. The SnapshotEvaluator 117 does not directly communicate with the underlying execution engine. 118 119 Args: 120 adapters: A single EngineAdapter or a dictionary of EngineAdapters where 121 the key is the gateway name. When a dictionary is provided, and not an 122 explicit default gateway its first item is treated as the default 123 adapter and used for the virtual layer. 124 ddl_concurrent_tasks: The number of concurrent tasks used for DDL 125 operations (table / view creation, deletion, etc). Default: 1. 126 """ 127 128 def __init__( 129 self, 130 adapters: EngineAdapter | t.Dict[str, EngineAdapter], 131 ddl_concurrent_tasks: int = 1, 132 selected_gateway: t.Optional[str] = None, 133 ): 134 self.adapters = ( 135 adapters if isinstance(adapters, t.Dict) else {selected_gateway or "": adapters} 136 ) 137 self.execution_tracker = QueryExecutionTracker() 138 self.adapters = { 139 gateway: adapter.with_settings(query_execution_tracker=self.execution_tracker) 140 for gateway, adapter in self.adapters.items() 141 } 142 self.adapter = ( 143 next(iter(self.adapters.values())) 144 if not selected_gateway 145 else self.adapters[selected_gateway] 146 ) 147 self.selected_gateway = selected_gateway 148 self.ddl_concurrent_tasks = ddl_concurrent_tasks 149 150 def evaluate( 151 self, 152 snapshot: Snapshot, 153 *, 154 start: TimeLike, 155 end: TimeLike, 156 execution_time: TimeLike, 157 snapshots: t.Dict[str, Snapshot], 158 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 159 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 160 deployability_index: t.Optional[DeployabilityIndex] = None, 161 batch_index: int = 0, 162 target_table_exists: t.Optional[bool] = None, 163 **kwargs: t.Any, 164 ) -> t.Optional[str]: 165 """Renders the snapshot's model, executes it and stores the result in the snapshot's physical table. 166 167 Args: 168 snapshot: Snapshot to evaluate. 169 start: The start datetime to render. 170 end: The end datetime to render. 171 execution_time: The date/time time reference to use for execution time. 172 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 173 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 174 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 175 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 176 batch_index: If the snapshot is part of a batch of related snapshots; which index in the batch is it 177 target_table_exists: Whether the target table exists. If None, the table will be checked for existence. 178 kwargs: Additional kwargs to pass to the renderer. 179 180 Returns: 181 The WAP ID of this evaluation if supported, None otherwise. 182 """ 183 with self.execution_tracker.track_execution( 184 SnapshotIdBatch(snapshot_id=snapshot.snapshot_id, batch_id=batch_index) 185 ): 186 result = self._evaluate_snapshot( 187 start=start, 188 end=end, 189 execution_time=execution_time, 190 snapshot=snapshot, 191 snapshots=snapshots, 192 allow_destructive_snapshots=allow_destructive_snapshots or set(), 193 allow_additive_snapshots=allow_additive_snapshots or set(), 194 deployability_index=deployability_index, 195 batch_index=batch_index, 196 target_table_exists=target_table_exists, 197 **kwargs, 198 ) 199 if result is None or isinstance(result, str): 200 return result 201 raise SQLMeshError( 202 f"Unexpected result {result} when evaluating snapshot {snapshot.snapshot_id}." 203 ) 204 205 def evaluate_and_fetch( 206 self, 207 snapshot: Snapshot, 208 *, 209 start: TimeLike, 210 end: TimeLike, 211 execution_time: TimeLike, 212 snapshots: t.Dict[str, Snapshot], 213 limit: int, 214 deployability_index: t.Optional[DeployabilityIndex] = None, 215 **kwargs: t.Any, 216 ) -> DF: 217 """Renders the snapshot's model, executes it and returns a dataframe with the result. 218 219 Args: 220 snapshot: Snapshot to evaluate. 221 start: The start datetime to render. 222 end: The end datetime to render. 223 execution_time: The date/time time reference to use for execution time. 224 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 225 limit: The maximum number of rows to fetch. 226 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 227 kwargs: Additional kwargs to pass to the renderer. 228 229 Returns: 230 The result of the evaluation as a dataframe. 231 """ 232 import pandas as pd 233 234 adapter = self.get_adapter(snapshot.model.gateway) 235 render_kwargs = dict( 236 start=start, 237 end=end, 238 execution_time=execution_time, 239 snapshot=snapshot, 240 runtime_stage=RuntimeStage.EVALUATING, 241 **kwargs, 242 ) 243 queries_or_dfs = self._render_snapshot_for_evaluation( 244 snapshot, 245 snapshots, 246 deployability_index or DeployabilityIndex.all_deployable(), 247 render_kwargs, 248 ) 249 query_or_df = next(queries_or_dfs) 250 if isinstance(query_or_df, pd.DataFrame): 251 return query_or_df.head(limit) 252 if not isinstance(query_or_df, exp.Expr): 253 # We assume that if this branch is reached, `query_or_df` is a pyspark / snowpark / bigframe dataframe, 254 # so we use `limit` instead of `head` to get back a dataframe instead of List[Row] 255 # https://spark.apache.org/docs/3.1.1/api/python/reference/api/pyspark.sql.DataFrame.head.html#pyspark.sql.DataFrame.head 256 return query_or_df.limit(limit) 257 258 assert isinstance(query_or_df, exp.Query) 259 260 existing_limit = query_or_df.args.get("limit") 261 if existing_limit: 262 limit = min(limit, execute(exp.select(existing_limit.expression)).rows[0][0]) 263 assert limit is not None 264 265 return adapter._fetch_native_df(query_or_df.limit(limit)) 266 267 def promote( 268 self, 269 target_snapshots: t.Iterable[Snapshot], 270 environment_naming_info: EnvironmentNamingInfo, 271 deployability_index: t.Optional[DeployabilityIndex] = None, 272 start: t.Optional[TimeLike] = None, 273 end: t.Optional[TimeLike] = None, 274 execution_time: t.Optional[TimeLike] = None, 275 snapshots: t.Optional[t.Dict[SnapshotId, Snapshot]] = None, 276 table_mapping: t.Optional[t.Dict[str, str]] = None, 277 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 278 ) -> None: 279 """Promotes the given collection of snapshots in the target environment by replacing a corresponding 280 view with a physical table associated with the given snapshot. 281 282 Args: 283 target_snapshots: Snapshots to promote. 284 environment_naming_info: Naming information for the target environment. 285 deployability_index: Determines snapshots that are deployable in the context of this promotion. 286 on_complete: A callback to call on each successfully promoted snapshot. 287 """ 288 289 tables_by_gateway: t.Dict[t.Union[str, None], t.List[exp.Table]] = defaultdict(list) 290 for snapshot in target_snapshots: 291 if snapshot.is_model and not snapshot.is_symbolic: 292 gateway = ( 293 snapshot.model_gateway if environment_naming_info.gateway_managed else None 294 ) 295 adapter = self.get_adapter(gateway) 296 table = snapshot.qualified_view_name.table_for_environment( 297 environment_naming_info, dialect=adapter.dialect 298 ) 299 tables_by_gateway[gateway].append(table) 300 301 # A schema can be shared across multiple engines, so we need to group by gateway 302 for gateway, tables in tables_by_gateway.items(): 303 if environment_naming_info.suffix_target.is_catalog: 304 self._create_catalogs(tables=tables, gateway=gateway) 305 306 gateway_table_pairs = [ 307 (gateway, table) for gateway, tables in tables_by_gateway.items() for table in tables 308 ] 309 self._create_schemas(gateway_table_pairs=gateway_table_pairs) 310 311 # Fetch the view data objects for the promoted snapshots to get them cached 312 self._get_virtual_data_objects(target_snapshots, environment_naming_info) 313 314 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 315 with self.concurrent_context(): 316 concurrent_apply_to_snapshots( 317 target_snapshots, 318 lambda s: self._promote_snapshot( 319 s, 320 start=start, 321 end=end, 322 execution_time=execution_time, 323 snapshots=snapshots, 324 table_mapping=table_mapping, 325 environment_naming_info=environment_naming_info, 326 deployability_index=deployability_index, # type: ignore 327 on_complete=on_complete, 328 ), 329 self.ddl_concurrent_tasks, 330 ) 331 332 def demote( 333 self, 334 target_snapshots: t.Iterable[Snapshot], 335 environment_naming_info: EnvironmentNamingInfo, 336 table_mapping: t.Optional[t.Dict[str, str]] = None, 337 deployability_index: t.Optional[DeployabilityIndex] = None, 338 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 339 ) -> None: 340 """Demotes the given collection of snapshots in the target environment by removing its view. 341 342 Args: 343 target_snapshots: Snapshots to demote. 344 environment_naming_info: Naming info for the target environment. 345 on_complete: A callback to call on each successfully demoted snapshot. 346 """ 347 with self.concurrent_context(): 348 concurrent_apply_to_snapshots( 349 target_snapshots, 350 lambda s: self._demote_snapshot( 351 s, 352 environment_naming_info, 353 deployability_index=deployability_index, 354 on_complete=on_complete, 355 table_mapping=table_mapping, 356 ), 357 self.ddl_concurrent_tasks, 358 ) 359 360 def create( 361 self, 362 target_snapshots: t.Iterable[Snapshot], 363 snapshots: t.Dict[SnapshotId, Snapshot], 364 deployability_index: t.Optional[DeployabilityIndex] = None, 365 on_start: t.Optional[t.Callable] = None, 366 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 367 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 368 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 369 ) -> CompletionStatus: 370 """Creates a physical snapshot schema and table for the given collection of snapshots. 371 372 Args: 373 target_snapshots: Target snapshots. 374 snapshots: Mapping of snapshot ID to snapshot. 375 deployability_index: Determines snapshots that are deployable in the context of this creation. 376 on_start: A callback to initialize the snapshot creation progress bar. 377 on_complete: A callback to call on each successfully created snapshot. 378 allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes. 379 allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes. 380 381 Returns: 382 CompletionStatus: The status of the creation operation (success, failure, nothing to do). 383 """ 384 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 385 386 snapshots_to_create = self.get_snapshots_to_create(target_snapshots, deployability_index) 387 if not snapshots_to_create: 388 return CompletionStatus.NOTHING_TO_DO 389 if on_start: 390 on_start(snapshots_to_create) 391 392 self._create_snapshots( 393 snapshots_to_create=snapshots_to_create, 394 snapshots={s.name: s for s in snapshots.values()}, 395 deployability_index=deployability_index, 396 on_complete=on_complete, 397 allow_destructive_snapshots=allow_destructive_snapshots or set(), 398 allow_additive_snapshots=allow_additive_snapshots or set(), 399 ) 400 return CompletionStatus.SUCCESS 401 402 def create_physical_schemas( 403 self, snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex 404 ) -> None: 405 """Creates the physical schemas for the given snapshots. 406 407 Args: 408 snapshots: Snapshots to create physical schemas for. 409 deployability_index: Determines snapshots that are deployable in the context of this creation. 410 """ 411 tables_by_gateway: t.Dict[t.Optional[str], t.List[str]] = defaultdict(list) 412 for snapshot in snapshots: 413 if snapshot.is_model and not snapshot.is_symbolic: 414 tables_by_gateway[snapshot.model_gateway].append( 415 snapshot.table_name(is_deployable=deployability_index.is_deployable(snapshot)) 416 ) 417 418 gateway_table_pairs = [ 419 (gateway, table) for gateway, tables in tables_by_gateway.items() for table in tables 420 ] 421 self._create_schemas(gateway_table_pairs=gateway_table_pairs) 422 423 def get_snapshots_to_create( 424 self, target_snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex 425 ) -> t.List[Snapshot]: 426 """Returns a list of snapshots that need to have their physical tables created. 427 428 Args: 429 target_snapshots: Target snapshots. 430 deployability_index: Determines snapshots that are deployable / representative in the context of this creation. 431 """ 432 existing_data_objects = self._get_physical_data_objects( 433 target_snapshots, deployability_index 434 ) 435 snapshots_to_create = [] 436 for snapshot in target_snapshots: 437 if not snapshot.is_model or snapshot.is_symbolic: 438 continue 439 if snapshot.snapshot_id not in existing_data_objects or ( 440 snapshot.is_seed and not snapshot.intervals 441 ): 442 snapshots_to_create.append(snapshot) 443 444 return snapshots_to_create 445 446 def _create_snapshots( 447 self, 448 snapshots_to_create: t.Iterable[Snapshot], 449 snapshots: t.Dict[str, Snapshot], 450 deployability_index: DeployabilityIndex, 451 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 452 allow_destructive_snapshots: t.Set[str], 453 allow_additive_snapshots: t.Set[str], 454 ) -> None: 455 """Internal method to create tables in parallel.""" 456 with self.concurrent_context(): 457 errors, skipped = concurrent_apply_to_snapshots( 458 snapshots_to_create, 459 lambda s: self.create_snapshot( 460 s, 461 snapshots=snapshots, 462 deployability_index=deployability_index, 463 allow_destructive_snapshots=allow_destructive_snapshots, 464 allow_additive_snapshots=allow_additive_snapshots, 465 on_complete=on_complete, 466 ), 467 self.ddl_concurrent_tasks, 468 raise_on_error=False, 469 ) 470 if errors: 471 raise SnapshotCreationFailedError(errors, skipped) 472 473 def migrate( 474 self, 475 target_snapshots: t.Iterable[Snapshot], 476 snapshots: t.Dict[SnapshotId, Snapshot], 477 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 478 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 479 deployability_index: t.Optional[DeployabilityIndex] = None, 480 ) -> None: 481 """Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots. 482 483 Args: 484 target_snapshots: Target snapshots. 485 snapshots: Mapping of snapshot ID to snapshot. 486 allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes. 487 allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes. 488 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 489 """ 490 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 491 target_data_objects = self._get_physical_data_objects(target_snapshots, deployability_index) 492 if not target_data_objects: 493 return 494 495 if not snapshots: 496 snapshots = {s.snapshot_id: s for s in target_snapshots} 497 498 allow_destructive_snapshots = allow_destructive_snapshots or set() 499 allow_additive_snapshots = allow_additive_snapshots or set() 500 snapshots_by_name = {s.name: s for s in snapshots.values()} 501 with self.concurrent_context(): 502 # Only migrate snapshots for which there's an existing data object 503 concurrent_apply_to_snapshots( 504 target_snapshots, 505 lambda s: self._migrate_snapshot( 506 s, 507 snapshots_by_name, 508 target_data_objects.get(s.snapshot_id), 509 allow_destructive_snapshots, 510 allow_additive_snapshots, 511 self.get_adapter(s.model_gateway), 512 deployability_index, 513 ), 514 self.ddl_concurrent_tasks, 515 ) 516 517 def cleanup( 518 self, 519 target_snapshots: t.Iterable[SnapshotTableCleanupTask], 520 on_complete: t.Optional[t.Callable[[str], None]] = None, 521 ) -> None: 522 """Cleans up the given snapshots by removing its table 523 524 Args: 525 target_snapshots: Snapshots to cleanup. 526 on_complete: A callback to call on each successfully deleted database object. 527 """ 528 target_snapshots = [ 529 t for t in target_snapshots if t.snapshot.is_model and not t.snapshot.is_symbolic 530 ] 531 snapshots_to_dev_table_only = { 532 t.snapshot.snapshot_id: t.dev_table_only for t in target_snapshots 533 } 534 with self.concurrent_context(): 535 concurrent_apply_to_snapshots( 536 [t.snapshot for t in target_snapshots], 537 lambda s: self._cleanup_snapshot( 538 s, 539 snapshots_to_dev_table_only[s.snapshot_id], 540 self.get_adapter(s.model_gateway), 541 on_complete, 542 ), 543 self.ddl_concurrent_tasks, 544 reverse_order=True, 545 ) 546 547 def audit( 548 self, 549 snapshot: Snapshot, 550 *, 551 snapshots: t.Dict[str, Snapshot], 552 start: t.Optional[TimeLike] = None, 553 end: t.Optional[TimeLike] = None, 554 execution_time: t.Optional[TimeLike] = None, 555 deployability_index: t.Optional[DeployabilityIndex] = None, 556 wap_id: t.Optional[str] = None, 557 **kwargs: t.Any, 558 ) -> t.List[AuditResult]: 559 """Execute a snapshot's node's audit queries. 560 561 Args: 562 snapshot: Snapshot to evaluate. 563 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 564 start: The start datetime to audit. Defaults to epoch start. 565 end: The end datetime to audit. Defaults to epoch start. 566 execution_time: The date/time time reference to use for execution time. 567 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 568 wap_id: The WAP ID if applicable, None otherwise. 569 kwargs: Additional kwargs to pass to the renderer. 570 """ 571 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 572 adapter = self.get_adapter(snapshot.model_gateway) 573 574 if not snapshot.version: 575 raise ConfigError( 576 f"Cannot audit '{snapshot.name}' because it has not been versioned yet. Apply a plan first." 577 ) 578 579 if wap_id is not None: 580 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 581 original_table_name = snapshot.table_name( 582 is_deployable=deployability_index.is_deployable(snapshot) 583 ) 584 wap_table_name = adapter.wap_table_name(original_table_name, wap_id) 585 logger.info( 586 "Auditing WAP table '%s', snapshot %s", 587 wap_table_name, 588 snapshot.snapshot_id, 589 ) 590 591 table_mapping = kwargs.get("table_mapping") or {} 592 table_mapping[snapshot.name] = wap_table_name 593 kwargs["table_mapping"] = table_mapping 594 kwargs["this_model"] = exp.to_table(wap_table_name, dialect=adapter.dialect) 595 596 results = [] 597 598 audits_with_args = snapshot.node.audits_with_args 599 600 force_non_blocking = False 601 602 if audits_with_args: 603 logger.info("Auditing snapshot %s", snapshot.snapshot_id) 604 605 if not deployability_index.is_deployable(snapshot) and not adapter.SUPPORTS_CLONING: 606 # For dev preview tables that aren't based on clones of the production table, only a subset of the data is typically available 607 # However, users still expect audits to run anwyay. Some audits (such as row count) are practically guaranteed to fail 608 # when run on only a subset of data, so we switch all audits to non blocking and the user can decide if they still want to proceed 609 force_non_blocking = True 610 611 for audit, audit_args in audits_with_args: 612 if force_non_blocking: 613 # remove any blocking indicator on the model itself 614 audit_args.pop("blocking", None) 615 # so that we can fall back to the audit's setting, which we override to blocking: False 616 audit = audit.model_copy(update={"blocking": False}) 617 618 results.append( 619 self._audit( 620 audit=audit, 621 audit_args=audit_args, 622 snapshot=snapshot, 623 snapshots=snapshots, 624 start=start, 625 end=end, 626 execution_time=execution_time, 627 deployability_index=deployability_index, 628 **kwargs, 629 ) 630 ) 631 632 if wap_id is not None: 633 logger.info( 634 "Publishing evaluation results for snapshot %s, WAP ID '%s'", 635 snapshot.snapshot_id, 636 wap_id, 637 ) 638 self.wap_publish_snapshot(snapshot, wap_id, deployability_index) 639 640 return results 641 642 @contextmanager 643 def concurrent_context(self) -> t.Iterator[None]: 644 try: 645 yield 646 finally: 647 self.recycle() 648 649 def recycle(self) -> None: 650 """Closes all open connections and releases all allocated resources associated with any thread 651 except the calling one.""" 652 try: 653 for adapter in self.adapters.values(): 654 adapter.recycle() 655 656 except Exception: 657 logger.exception("Failed to recycle Snapshot Evaluator") 658 659 def close(self) -> None: 660 """Closes all open connections and releases all allocated resources.""" 661 try: 662 for adapter in self.adapters.values(): 663 adapter.close() 664 except Exception: 665 logger.exception("Failed to close Snapshot Evaluator") 666 667 def set_correlation_id(self, correlation_id: CorrelationId) -> SnapshotEvaluator: 668 return SnapshotEvaluator( 669 { 670 gateway: adapter.with_settings(correlation_id=correlation_id) 671 for gateway, adapter in self.adapters.items() 672 }, 673 self.ddl_concurrent_tasks, 674 self.selected_gateway, 675 ) 676 677 def _evaluate_snapshot( 678 self, 679 start: TimeLike, 680 end: TimeLike, 681 execution_time: TimeLike, 682 snapshot: Snapshot, 683 snapshots: t.Dict[str, Snapshot], 684 allow_destructive_snapshots: t.Set[str], 685 allow_additive_snapshots: t.Set[str], 686 deployability_index: t.Optional[DeployabilityIndex], 687 batch_index: int, 688 target_table_exists: t.Optional[bool], 689 **kwargs: t.Any, 690 ) -> t.Optional[str]: 691 """Renders the snapshot's model and executes it. The return value depends on whether the limit was specified. 692 693 Args: 694 snapshot: Snapshot to evaluate. 695 start: The start datetime to render. 696 end: The end datetime to render. 697 execution_time: The date/time time reference to use for execution time. 698 snapshots: All upstream snapshots to use for expansion and mapping of physical locations. 699 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 700 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 701 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 702 batch_index: If the snapshot is part of a batch of related snapshots; which index in the batch is it 703 target_table_exists: Whether the target table exists. If None, the table will be checked for existence. 704 kwargs: Additional kwargs to pass to the renderer. 705 """ 706 if not snapshot.is_model: 707 return None 708 709 model = snapshot.model 710 711 logger.info("Evaluating snapshot %s", snapshot.snapshot_id) 712 713 adapter = self.get_adapter(model.gateway) 714 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 715 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 716 target_table_name = snapshot.table_name(is_deployable=is_snapshot_deployable) 717 # https://github.com/SQLMesh/sqlmesh/issues/2609 718 # If there are no existing intervals yet; only consider this a first insert for the first snapshot in the batch 719 if target_table_exists is None: 720 target_table_exists = adapter.table_exists(target_table_name) 721 is_first_insert = ( 722 not _intervals(snapshot, deployability_index) or not target_table_exists 723 ) and batch_index == 0 724 725 # Use the 'creating' stage if the table doesn't exist yet to preserve backwards compatibility with existing projects 726 # that depend on a separate physical table creation stage. 727 runtime_stage = RuntimeStage.EVALUATING if target_table_exists else RuntimeStage.CREATING 728 common_render_kwargs = dict( 729 start=start, 730 end=end, 731 execution_time=execution_time, 732 snapshot=snapshot, 733 runtime_stage=runtime_stage, 734 **kwargs, 735 ) 736 create_render_kwargs = dict( 737 engine_adapter=adapter, 738 snapshots=snapshots, 739 deployability_index=deployability_index, 740 **common_render_kwargs, 741 ) 742 create_render_kwargs["runtime_stage"] = RuntimeStage.CREATING 743 render_statements_kwargs = dict( 744 engine_adapter=adapter, 745 snapshots=snapshots, 746 deployability_index=deployability_index, 747 **common_render_kwargs, 748 ) 749 rendered_physical_properties = snapshot.model.render_physical_properties( 750 **render_statements_kwargs 751 ) 752 753 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 754 evaluation_strategy.run_pre_statements( 755 snapshot=snapshot, 756 render_kwargs={**render_statements_kwargs, "inside_transaction": False}, 757 ) 758 759 with ( 760 adapter.transaction(), 761 adapter.session(snapshot.model.render_session_properties(**render_statements_kwargs)), 762 ): 763 evaluation_strategy.run_pre_statements( 764 snapshot=snapshot, 765 render_kwargs={**render_statements_kwargs, "inside_transaction": True}, 766 ) 767 768 if not target_table_exists or (model.is_seed and not snapshot.intervals): 769 # Only create the empty table if the columns were provided explicitly by the user 770 should_create_empty_table = ( 771 model.kind.is_materialized 772 and model.columns_to_types_ 773 and columns_to_types_all_known(model.columns_to_types_) 774 ) 775 if not should_create_empty_table: 776 # Or if the model is self-referential and its query is fully annotated with types 777 should_create_empty_table = model.depends_on_self and model.annotated 778 if self._can_clone(snapshot, deployability_index): 779 self._clone_snapshot_in_dev( 780 snapshot=snapshot, 781 snapshots=snapshots, 782 deployability_index=deployability_index, 783 render_kwargs=create_render_kwargs, 784 rendered_physical_properties=rendered_physical_properties.copy(), 785 allow_destructive_snapshots=allow_destructive_snapshots, 786 allow_additive_snapshots=allow_additive_snapshots, 787 ) 788 runtime_stage = RuntimeStage.EVALUATING 789 target_table_exists = True 790 elif should_create_empty_table or model.is_seed or model.kind.is_scd_type_2: 791 self._execute_create( 792 snapshot=snapshot, 793 table_name=target_table_name, 794 is_table_deployable=is_snapshot_deployable, 795 deployability_index=deployability_index, 796 create_render_kwargs=create_render_kwargs, 797 rendered_physical_properties=rendered_physical_properties.copy(), 798 dry_run=False, 799 run_pre_post_statements=False, 800 ) 801 runtime_stage = RuntimeStage.EVALUATING 802 target_table_exists = True 803 804 evaluate_render_kwargs = { 805 **common_render_kwargs, 806 "runtime_stage": runtime_stage, 807 "snapshot_table_exists": target_table_exists, 808 } 809 810 wap_id: t.Optional[str] = None 811 if ( 812 snapshot.is_materialized 813 and target_table_exists 814 and adapter.wap_enabled 815 and (model.wap_supported or adapter.wap_supported(target_table_name)) 816 ): 817 wap_id = random_id()[0:8] 818 logger.info("Using WAP ID '%s' for snapshot %s", wap_id, snapshot.snapshot_id) 819 target_table_name = adapter.wap_prepare(target_table_name, wap_id) 820 821 self._render_and_insert_snapshot( 822 start=start, 823 end=end, 824 execution_time=execution_time, 825 snapshot=snapshot, 826 snapshots=snapshots, 827 render_kwargs=evaluate_render_kwargs, 828 create_render_kwargs=create_render_kwargs, 829 rendered_physical_properties=rendered_physical_properties, 830 deployability_index=deployability_index, 831 target_table_name=target_table_name, 832 is_first_insert=is_first_insert, 833 batch_index=batch_index, 834 ) 835 836 evaluation_strategy.run_post_statements( 837 snapshot=snapshot, 838 render_kwargs={**render_statements_kwargs, "inside_transaction": True}, 839 ) 840 841 evaluation_strategy.run_post_statements( 842 snapshot=snapshot, 843 render_kwargs={**render_statements_kwargs, "inside_transaction": False}, 844 ) 845 846 return wap_id 847 848 def create_snapshot( 849 self, 850 snapshot: Snapshot, 851 snapshots: t.Dict[str, Snapshot], 852 deployability_index: DeployabilityIndex, 853 allow_destructive_snapshots: t.Set[str], 854 allow_additive_snapshots: t.Set[str], 855 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 856 ) -> None: 857 """Creates a physical table for the given snapshot. 858 859 Args: 860 snapshot: Snapshot to create. 861 snapshots: All upstream snapshots to use for expansion and mapping of physical locations. 862 deployability_index: Determines snapshots that are deployable in the context of this creation. 863 on_complete: A callback to call on each successfully created database object. 864 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 865 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 866 """ 867 if not snapshot.is_model: 868 return 869 870 logger.info("Creating a physical table for snapshot %s", snapshot.snapshot_id) 871 872 adapter = self.get_adapter(snapshot.model.gateway) 873 create_render_kwargs: t.Dict[str, t.Any] = dict( 874 engine_adapter=adapter, 875 snapshots=snapshots, 876 runtime_stage=RuntimeStage.CREATING, 877 deployability_index=deployability_index, 878 ) 879 880 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 881 evaluation_strategy.run_pre_statements( 882 snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} 883 ) 884 885 with ( 886 adapter.transaction(), 887 adapter.session(snapshot.model.render_session_properties(**create_render_kwargs)), 888 ): 889 rendered_physical_properties = snapshot.model.render_physical_properties( 890 **create_render_kwargs 891 ) 892 893 if self._can_clone(snapshot, deployability_index): 894 self._clone_snapshot_in_dev( 895 snapshot=snapshot, 896 snapshots=snapshots, 897 deployability_index=deployability_index, 898 render_kwargs=create_render_kwargs, 899 rendered_physical_properties=rendered_physical_properties, 900 allow_destructive_snapshots=allow_destructive_snapshots, 901 allow_additive_snapshots=allow_additive_snapshots, 902 run_pre_post_statements=True, 903 ) 904 else: 905 is_table_deployable = deployability_index.is_deployable(snapshot) 906 self._execute_create( 907 snapshot=snapshot, 908 table_name=snapshot.table_name(is_deployable=is_table_deployable), 909 is_table_deployable=is_table_deployable, 910 deployability_index=deployability_index, 911 create_render_kwargs=create_render_kwargs, 912 rendered_physical_properties=rendered_physical_properties, 913 dry_run=True, 914 ) 915 916 evaluation_strategy.run_post_statements( 917 snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} 918 ) 919 920 if on_complete is not None: 921 on_complete(snapshot) 922 923 def wap_publish_snapshot( 924 self, 925 snapshot: Snapshot, 926 wap_id: str, 927 deployability_index: t.Optional[DeployabilityIndex], 928 ) -> None: 929 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 930 table_name = snapshot.table_name(is_deployable=deployability_index.is_deployable(snapshot)) 931 adapter = self.get_adapter(snapshot.model_gateway) 932 adapter.wap_publish(table_name, wap_id) 933 934 def _render_and_insert_snapshot( 935 self, 936 start: TimeLike, 937 end: TimeLike, 938 execution_time: TimeLike, 939 snapshot: Snapshot, 940 snapshots: t.Dict[str, Snapshot], 941 render_kwargs: t.Dict[str, t.Any], 942 create_render_kwargs: t.Dict[str, t.Any], 943 rendered_physical_properties: t.Dict[str, exp.Expr], 944 deployability_index: DeployabilityIndex, 945 target_table_name: str, 946 is_first_insert: bool, 947 batch_index: int, 948 ) -> None: 949 if not snapshot.is_model or snapshot.is_seed: 950 return 951 952 logger.info("Inserting data for snapshot %s", snapshot.snapshot_id) 953 954 model = snapshot.model 955 adapter = self.get_adapter(model.gateway) 956 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 957 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 958 959 queries_or_dfs = self._render_snapshot_for_evaluation( 960 snapshot, 961 snapshots, 962 deployability_index, 963 render_kwargs, 964 ) 965 966 def apply(query_or_df: QueryOrDF, index: int = 0) -> None: 967 if index > 0: 968 evaluation_strategy.append( 969 table_name=target_table_name, 970 query_or_df=query_or_df, 971 model=snapshot.model, 972 snapshot=snapshot, 973 snapshots=snapshots, 974 deployability_index=deployability_index, 975 batch_index=batch_index, 976 start=start, 977 end=end, 978 execution_time=execution_time, 979 physical_properties=rendered_physical_properties, 980 render_kwargs=create_render_kwargs, 981 is_snapshot_deployable=is_snapshot_deployable, 982 ) 983 else: 984 logger.info( 985 "Inserting batch (%s, %s) into %s'", 986 time_like_to_str(start), 987 time_like_to_str(end), 988 target_table_name, 989 ) 990 evaluation_strategy.insert( 991 table_name=target_table_name, 992 query_or_df=query_or_df, 993 is_first_insert=is_first_insert, 994 model=snapshot.model, 995 snapshot=snapshot, 996 snapshots=snapshots, 997 deployability_index=deployability_index, 998 batch_index=batch_index, 999 start=start, 1000 end=end, 1001 execution_time=execution_time, 1002 physical_properties=rendered_physical_properties, 1003 render_kwargs=create_render_kwargs, 1004 is_snapshot_deployable=is_snapshot_deployable, 1005 ) 1006 1007 # DataFrames, unlike SQL expressions, can provide partial results by yielding dataframes. As a result, 1008 # if the engine supports INSERT OVERWRITE or REPLACE WHERE and the snapshot is incremental by time range, we risk 1009 # having a partial result since each dataframe write can re-truncate partitions. To avoid this, we 1010 # union all the dataframes together before writing. For pandas this could result in OOM and a potential 1011 # workaround for that would be to serialize pandas to disk and then read it back with Spark. 1012 # Note: We assume that if multiple things are yielded from `queries_or_dfs` that they are dataframes 1013 # and not SQL expressions. 1014 if ( 1015 adapter.INSERT_OVERWRITE_STRATEGY 1016 in ( 1017 InsertOverwriteStrategy.INSERT_OVERWRITE, 1018 InsertOverwriteStrategy.REPLACE_WHERE, 1019 ) 1020 and snapshot.is_incremental_by_time_range 1021 ): 1022 import pandas as pd 1023 1024 try: 1025 first_query_or_df = next(queries_or_dfs) 1026 except StopIteration: 1027 return 1028 1029 query_or_df = reduce( 1030 lambda a, b: ( 1031 pd.concat([a, b], ignore_index=True) # type: ignore 1032 if isinstance(a, pd.DataFrame) 1033 else a.union_all(b) # type: ignore 1034 ), # type: ignore 1035 queries_or_dfs, 1036 first_query_or_df, 1037 ) 1038 apply(query_or_df, index=0) 1039 else: 1040 for index, query_or_df in enumerate(queries_or_dfs): 1041 apply(query_or_df, index) 1042 1043 def _render_snapshot_for_evaluation( 1044 self, 1045 snapshot: Snapshot, 1046 snapshots: t.Dict[str, Snapshot], 1047 deployability_index: DeployabilityIndex, 1048 render_kwargs: t.Dict[str, t.Any], 1049 ) -> t.Iterator[QueryOrDF]: 1050 from sqlmesh.core.context import ExecutionContext 1051 1052 model = snapshot.model 1053 adapter = self.get_adapter(model.gateway) 1054 1055 return model.render( 1056 context=ExecutionContext( 1057 adapter, 1058 snapshots, 1059 deployability_index, 1060 default_dialect=model.dialect, 1061 default_catalog=model.default_catalog, 1062 ), 1063 **render_kwargs, 1064 ) 1065 1066 def _clone_snapshot_in_dev( 1067 self, 1068 snapshot: Snapshot, 1069 snapshots: t.Dict[str, Snapshot], 1070 deployability_index: DeployabilityIndex, 1071 render_kwargs: t.Dict[str, t.Any], 1072 rendered_physical_properties: t.Dict[str, exp.Expr], 1073 allow_destructive_snapshots: t.Set[str], 1074 allow_additive_snapshots: t.Set[str], 1075 run_pre_post_statements: bool = False, 1076 ) -> None: 1077 adapter = self.get_adapter(snapshot.model.gateway) 1078 1079 target_table_name = snapshot.table_name(is_deployable=False) 1080 source_table_name = snapshot.table_name() 1081 1082 try: 1083 logger.info(f"Cloning table '{source_table_name}' into '{target_table_name}'") 1084 adapter.clone_table( 1085 target_table_name, 1086 snapshot.table_name(), 1087 rendered_physical_properties=rendered_physical_properties, 1088 ) 1089 self._migrate_target_table( 1090 target_table_name=target_table_name, 1091 snapshot=snapshot, 1092 snapshots=snapshots, 1093 deployability_index=deployability_index, 1094 render_kwargs=render_kwargs, 1095 rendered_physical_properties=rendered_physical_properties, 1096 allow_destructive_snapshots=allow_destructive_snapshots, 1097 allow_additive_snapshots=allow_additive_snapshots, 1098 run_pre_post_statements=run_pre_post_statements, 1099 ) 1100 1101 except Exception: 1102 adapter.drop_table(target_table_name) 1103 raise 1104 1105 def _migrate_snapshot( 1106 self, 1107 snapshot: Snapshot, 1108 snapshots: t.Dict[str, Snapshot], 1109 target_data_object: t.Optional[DataObject], 1110 allow_destructive_snapshots: t.Set[str], 1111 allow_additive_snapshots: t.Set[str], 1112 adapter: EngineAdapter, 1113 deployability_index: DeployabilityIndex, 1114 ) -> None: 1115 if not snapshot.is_model or snapshot.is_symbolic: 1116 return 1117 1118 deployability_index = DeployabilityIndex.all_deployable() 1119 render_kwargs: t.Dict[str, t.Any] = dict( 1120 engine_adapter=adapter, 1121 snapshots=snapshots, 1122 runtime_stage=RuntimeStage.CREATING, 1123 deployability_index=deployability_index, 1124 ) 1125 target_table_name = snapshot.table_name() 1126 1127 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1128 evaluation_strategy.run_pre_statements( 1129 snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} 1130 ) 1131 1132 with ( 1133 adapter.transaction(), 1134 adapter.session(snapshot.model.render_session_properties(**render_kwargs)), 1135 ): 1136 table_exists = target_data_object is not None 1137 if adapter.drop_data_object_on_type_mismatch( 1138 target_data_object, _snapshot_to_data_object_type(snapshot) 1139 ): 1140 table_exists = False 1141 1142 rendered_physical_properties = snapshot.model.render_physical_properties( 1143 **render_kwargs 1144 ) 1145 1146 if table_exists: 1147 self._migrate_target_table( 1148 target_table_name=target_table_name, 1149 snapshot=snapshot, 1150 snapshots=snapshots, 1151 deployability_index=deployability_index, 1152 render_kwargs=render_kwargs, 1153 rendered_physical_properties=rendered_physical_properties, 1154 allow_destructive_snapshots=allow_destructive_snapshots, 1155 allow_additive_snapshots=allow_additive_snapshots, 1156 run_pre_post_statements=True, 1157 ) 1158 else: 1159 self._execute_create( 1160 snapshot=snapshot, 1161 table_name=snapshot.table_name(is_deployable=True), 1162 is_table_deployable=True, 1163 deployability_index=deployability_index, 1164 create_render_kwargs=render_kwargs, 1165 rendered_physical_properties=rendered_physical_properties, 1166 dry_run=True, 1167 ) 1168 1169 evaluation_strategy.run_post_statements( 1170 snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} 1171 ) 1172 1173 # Retry in case when the table is migrated concurrently from another plan application 1174 @retry( 1175 reraise=True, 1176 stop=stop_after_attempt(5), 1177 wait=wait_exponential(min=1, max=16), 1178 retry=retry_if_not_exception_type( 1179 (DestructiveChangeError, AdditiveChangeError, MigrationNotSupportedError) 1180 ), 1181 ) 1182 def _migrate_target_table( 1183 self, 1184 target_table_name: str, 1185 snapshot: Snapshot, 1186 snapshots: t.Dict[str, Snapshot], 1187 deployability_index: DeployabilityIndex, 1188 render_kwargs: t.Dict[str, t.Any], 1189 rendered_physical_properties: t.Dict[str, exp.Expr], 1190 allow_destructive_snapshots: t.Set[str], 1191 allow_additive_snapshots: t.Set[str], 1192 run_pre_post_statements: bool = False, 1193 ) -> None: 1194 adapter = self.get_adapter(snapshot.model.gateway) 1195 1196 tmp_table = exp.to_table(target_table_name) 1197 tmp_table.this.set("this", f"{tmp_table.name}_schema_tmp") 1198 tmp_table_name = tmp_table.sql() 1199 1200 if snapshot.is_materialized: 1201 self._execute_create( 1202 snapshot=snapshot, 1203 table_name=tmp_table_name, 1204 is_table_deployable=False, 1205 deployability_index=deployability_index, 1206 create_render_kwargs=render_kwargs, 1207 rendered_physical_properties=rendered_physical_properties, 1208 dry_run=False, 1209 run_pre_post_statements=run_pre_post_statements, 1210 skip_grants=True, # skip grants for tmp table 1211 ) 1212 try: 1213 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1214 logger.info( 1215 "Migrating table schema from '%s' to '%s'", 1216 tmp_table_name, 1217 target_table_name, 1218 ) 1219 evaluation_strategy.migrate( 1220 target_table_name=target_table_name, 1221 source_table_name=tmp_table_name, 1222 snapshot=snapshot, 1223 snapshots=snapshots, 1224 allow_destructive_snapshots=allow_destructive_snapshots, 1225 allow_additive_snapshots=allow_additive_snapshots, 1226 ignore_destructive=snapshot.model.on_destructive_change.is_ignore, 1227 ignore_additive=snapshot.model.on_additive_change.is_ignore, 1228 deployability_index=deployability_index, 1229 ) 1230 finally: 1231 if snapshot.is_materialized: 1232 adapter.drop_table(tmp_table_name) 1233 1234 def _promote_snapshot( 1235 self, 1236 snapshot: Snapshot, 1237 environment_naming_info: EnvironmentNamingInfo, 1238 deployability_index: DeployabilityIndex, 1239 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 1240 start: t.Optional[TimeLike] = None, 1241 end: t.Optional[TimeLike] = None, 1242 execution_time: t.Optional[TimeLike] = None, 1243 snapshots: t.Optional[t.Dict[SnapshotId, Snapshot]] = None, 1244 table_mapping: t.Optional[t.Dict[str, str]] = None, 1245 ) -> None: 1246 if not snapshot.is_model: 1247 return 1248 1249 adapter = ( 1250 self.get_adapter(snapshot.model_gateway) 1251 if environment_naming_info.gateway_managed 1252 else self.adapter 1253 ) 1254 table_name = snapshot.table_name(deployability_index.is_representative(snapshot)) 1255 view_name = snapshot.qualified_view_name.for_environment( 1256 environment_naming_info, dialect=adapter.dialect 1257 ) 1258 render_kwargs: t.Dict[str, t.Any] = dict( 1259 start=start, 1260 end=end, 1261 execution_time=execution_time, 1262 engine_adapter=adapter, 1263 deployability_index=deployability_index, 1264 table_mapping=table_mapping, 1265 runtime_stage=RuntimeStage.PROMOTING, 1266 ) 1267 1268 with ( 1269 adapter.transaction(), 1270 adapter.session(snapshot.model.render_session_properties(**render_kwargs)), 1271 ): 1272 _evaluation_strategy(snapshot, adapter).promote( 1273 table_name=table_name, 1274 view_name=view_name, 1275 model=snapshot.model, 1276 environment=environment_naming_info.name, 1277 snapshots=snapshots, 1278 snapshot=snapshot, 1279 **render_kwargs, 1280 ) 1281 1282 snapshot_by_name = {s.name: s for s in (snapshots or {}).values()} 1283 render_kwargs["snapshots"] = snapshot_by_name 1284 adapter.execute(snapshot.model.render_on_virtual_update(**render_kwargs)) 1285 1286 if on_complete is not None: 1287 on_complete(snapshot) 1288 1289 def _demote_snapshot( 1290 self, 1291 snapshot: Snapshot, 1292 environment_naming_info: EnvironmentNamingInfo, 1293 deployability_index: t.Optional[DeployabilityIndex], 1294 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 1295 table_mapping: t.Optional[t.Dict[str, str]] = None, 1296 ) -> None: 1297 if not snapshot.is_model: 1298 return 1299 1300 adapter = ( 1301 self.get_adapter(snapshot.model_gateway) 1302 if environment_naming_info.gateway_managed 1303 else self.adapter 1304 ) 1305 view_name = snapshot.qualified_view_name.for_environment( 1306 environment_naming_info, dialect=adapter.dialect 1307 ) 1308 with ( 1309 adapter.transaction(), 1310 adapter.session( 1311 snapshot.model.render_session_properties( 1312 engine_adapter=adapter, 1313 deployability_index=deployability_index, 1314 table_mapping=table_mapping, 1315 runtime_stage=RuntimeStage.DEMOTING, 1316 ) 1317 ), 1318 ): 1319 _evaluation_strategy(snapshot, adapter).demote(view_name) 1320 1321 if on_complete is not None: 1322 on_complete(snapshot) 1323 1324 def _cleanup_snapshot( 1325 self, 1326 snapshot: SnapshotInfoLike, 1327 dev_table_only: bool, 1328 adapter: EngineAdapter, 1329 on_complete: t.Optional[t.Callable[[str], None]], 1330 ) -> None: 1331 snapshot = snapshot.table_info 1332 1333 table_names = [(False, snapshot.table_name(is_deployable=False))] 1334 if not dev_table_only: 1335 table_names.append((True, snapshot.table_name(is_deployable=True))) 1336 1337 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1338 for is_table_deployable, table_name in table_names: 1339 try: 1340 evaluation_strategy.delete( 1341 table_name, 1342 is_table_deployable=is_table_deployable, 1343 physical_schema=snapshot.physical_schema, 1344 # we need to set cascade=true or we will get a 'cant drop because other objects depend on it'-style 1345 # error on engines that enforce referential integrity, such as Postgres 1346 # this situation can happen when a snapshot expires but downstream view snapshots that reference it have not yet expired 1347 cascade=True, 1348 ) 1349 except Exception: 1350 # Use `get_data_object` to check if the table exists instead of `table_exists` since the former 1351 # is based on `INFORMATION_SCHEMA` and avoids touching the table directly. 1352 # This is important when the table name is malformed for some reason and running any statement 1353 # that touches the table would result in an error. 1354 if adapter.get_data_object(table_name) is not None: 1355 raise 1356 logger.warning( 1357 "Skipping cleanup of table '%s' because it does not exist", table_name 1358 ) 1359 1360 if on_complete is not None: 1361 on_complete(table_name) 1362 1363 def _audit( 1364 self, 1365 audit: Audit, 1366 audit_args: t.Dict[t.Any, t.Any], 1367 snapshot: Snapshot, 1368 snapshots: t.Dict[str, Snapshot], 1369 start: t.Optional[TimeLike], 1370 end: t.Optional[TimeLike], 1371 execution_time: t.Optional[TimeLike], 1372 deployability_index: t.Optional[DeployabilityIndex], 1373 **kwargs: t.Any, 1374 ) -> AuditResult: 1375 if audit.skip: 1376 return AuditResult( 1377 audit=audit, 1378 audit_args=audit_args, 1379 model=snapshot.model_or_none, 1380 skipped=True, 1381 ) 1382 1383 # Model's "blocking" argument takes precedence over the audit's default setting 1384 blocking = audit_args.pop("blocking", None) 1385 blocking = blocking == exp.true() if blocking else audit.blocking 1386 1387 adapter = self.get_adapter(snapshot.model_gateway) 1388 1389 kwargs = { 1390 "start": start, 1391 "end": end, 1392 "execution_time": execution_time, 1393 "snapshots": snapshots, 1394 "deployability_index": deployability_index, 1395 "engine_adapter": adapter, 1396 "runtime_stage": RuntimeStage.AUDITING, 1397 **audit_args, 1398 **kwargs, 1399 } 1400 1401 if snapshot.is_model: 1402 query = snapshot.model.render_audit_query(audit, **kwargs) 1403 elif isinstance(audit, StandaloneAudit): 1404 query = audit.render_audit_query(**kwargs) 1405 else: 1406 raise SQLMeshError("Expected model or standalone audit. {snapshot}: {audit}") 1407 1408 count, *_ = adapter.fetchone( 1409 select("COUNT(*)").from_(query.subquery("audit")), 1410 quote_identifiers=True, 1411 ) # type: ignore 1412 1413 return AuditResult( 1414 audit=audit, 1415 audit_args=audit_args, 1416 model=snapshot.model_or_none, 1417 count=count, 1418 query=query, 1419 blocking=blocking, 1420 ) 1421 1422 def _create_catalogs( 1423 self, 1424 tables: t.Iterable[t.Union[exp.Table, str]], 1425 gateway: t.Optional[str] = None, 1426 ) -> None: 1427 # attempt to create catalogs for the virtual layer if possible 1428 adapter = self.get_adapter(gateway) 1429 if adapter.SUPPORTS_CREATE_DROP_CATALOG: 1430 unique_catalogs = {t.catalog for t in [exp.to_table(maybe_t) for maybe_t in tables]} 1431 for catalog_name in unique_catalogs: 1432 adapter.create_catalog(catalog_name) 1433 1434 def _create_schemas( 1435 self, 1436 gateway_table_pairs: t.Iterable[t.Tuple[t.Optional[str], t.Union[exp.Table, str]]], 1437 ) -> None: 1438 table_exprs = [(gateway, exp.to_table(t)) for gateway, t in gateway_table_pairs] 1439 unique_schemas = { 1440 (gateway, t.args["db"], t.args.get("catalog")) 1441 for gateway, t in table_exprs 1442 if t and t.db 1443 } 1444 1445 def _create_schema( 1446 gateway: t.Optional[str], schema_name: str, catalog: t.Optional[str] 1447 ) -> None: 1448 schema = schema_(schema_name, catalog) 1449 logger.info("Creating schema '%s'", schema) 1450 adapter = self.get_adapter(gateway) 1451 adapter.create_schema(schema) 1452 1453 with self.concurrent_context(): 1454 concurrent_apply_to_values( 1455 list(unique_schemas), 1456 lambda item: _create_schema(item[0], item[1], item[2]), 1457 self.ddl_concurrent_tasks, 1458 ) 1459 1460 def get_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 1461 """Returns the adapter for the specified gateway or the default adapter if none is provided.""" 1462 if gateway: 1463 if adapter := self.adapters.get(gateway): 1464 return adapter 1465 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 1466 return self.adapter 1467 1468 def _execute_create( 1469 self, 1470 snapshot: Snapshot, 1471 table_name: str, 1472 is_table_deployable: bool, 1473 deployability_index: DeployabilityIndex, 1474 create_render_kwargs: t.Dict[str, t.Any], 1475 rendered_physical_properties: t.Dict[str, exp.Expr], 1476 dry_run: bool, 1477 run_pre_post_statements: bool = True, 1478 skip_grants: bool = False, 1479 ) -> None: 1480 adapter = self.get_adapter(snapshot.model.gateway) 1481 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1482 1483 # It can still be useful for some strategies to know if the snapshot was actually deployable 1484 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 1485 is_snapshot_representative = deployability_index.is_representative(snapshot) 1486 1487 create_render_kwargs = { 1488 **create_render_kwargs, 1489 "table_mapping": {snapshot.name: table_name}, 1490 } 1491 if run_pre_post_statements: 1492 evaluation_strategy.run_pre_statements( 1493 snapshot=snapshot, 1494 render_kwargs={**create_render_kwargs, "inside_transaction": True}, 1495 ) 1496 evaluation_strategy.create( 1497 table_name=table_name, 1498 model=snapshot.model, 1499 is_table_deployable=is_table_deployable, 1500 skip_grants=skip_grants, 1501 render_kwargs=create_render_kwargs, 1502 is_snapshot_deployable=is_snapshot_deployable, 1503 is_snapshot_representative=is_snapshot_representative, 1504 dry_run=dry_run, 1505 physical_properties=rendered_physical_properties, 1506 snapshot=snapshot, 1507 deployability_index=deployability_index, 1508 ) 1509 if run_pre_post_statements: 1510 evaluation_strategy.run_post_statements( 1511 snapshot=snapshot, 1512 render_kwargs={**create_render_kwargs, "inside_transaction": True}, 1513 ) 1514 1515 def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex) -> bool: 1516 adapter = self.get_adapter(snapshot.model.gateway) 1517 return ( 1518 snapshot.is_forward_only 1519 and snapshot.is_materialized 1520 and bool(snapshot.previous_versions) 1521 and adapter.SUPPORTS_CLONING 1522 # managed models cannot have their schema mutated because they're based on queries, so clone + alter won't work 1523 and not snapshot.is_managed 1524 and not snapshot.is_dbt_custom 1525 and not deployability_index.is_deployable(snapshot) 1526 # If the deployable table is missing we can't clone it 1527 and adapter.table_exists(snapshot.table_name()) 1528 ) 1529 1530 def _get_physical_data_objects( 1531 self, 1532 target_snapshots: t.Iterable[Snapshot], 1533 deployability_index: DeployabilityIndex, 1534 ) -> t.Dict[SnapshotId, DataObject]: 1535 """Returns a dictionary of snapshot IDs to existing data objects of their physical tables. 1536 1537 Args: 1538 target_snapshots: Target snapshots. 1539 deployability_index: The deployability index to determine whether to look for a deployable or 1540 a non-deployable physical table. 1541 1542 Returns: 1543 A dictionary of snapshot IDs to existing data objects of their physical tables. If the data object 1544 for a snapshot is not found, it will not be included in the dictionary. 1545 """ 1546 return self._get_data_objects( 1547 target_snapshots, 1548 lambda s: exp.to_table( 1549 s.table_name(deployability_index.is_deployable(s)), dialect=s.model.dialect 1550 ), 1551 ) 1552 1553 def _get_virtual_data_objects( 1554 self, 1555 target_snapshots: t.Iterable[Snapshot], 1556 environment_naming_info: EnvironmentNamingInfo, 1557 ) -> t.Dict[SnapshotId, DataObject]: 1558 """Returns a dictionary of snapshot IDs to existing data objects of their virtual views. 1559 1560 Args: 1561 target_snapshots: Target snapshots. 1562 environment_naming_info: The environment naming info of the target virtual environment. 1563 1564 Returns: 1565 A dictionary of snapshot IDs to existing data objects of their virtual views. If the data object 1566 for a snapshot is not found, it will not be included in the dictionary. 1567 """ 1568 1569 def _get_view_name(s: Snapshot) -> exp.Table: 1570 adapter = ( 1571 self.get_adapter(s.model_gateway) 1572 if environment_naming_info.gateway_managed 1573 else self.adapter 1574 ) 1575 return exp.to_table( 1576 s.qualified_view_name.for_environment( 1577 environment_naming_info, dialect=adapter.dialect 1578 ), 1579 dialect=adapter.dialect, 1580 ) 1581 1582 return self._get_data_objects(target_snapshots, _get_view_name) 1583 1584 def _get_data_objects( 1585 self, 1586 target_snapshots: t.Iterable[Snapshot], 1587 table_name_callable: t.Callable[[Snapshot], exp.Table], 1588 ) -> t.Dict[SnapshotId, DataObject]: 1589 """Returns a dictionary of snapshot IDs to existing data objects. 1590 1591 Args: 1592 target_snapshots: Target snapshots. 1593 table_name_callable: A function that takes a snapshot and returns the table to look for. 1594 1595 Returns: 1596 A dictionary of snapshot IDs to existing data objects. If the data object for a snapshot is not found, 1597 it will not be included in the dictionary. 1598 """ 1599 tables_by_gateway_and_schema: t.Dict[t.Union[str, None], t.Dict[exp.Table, set[str]]] = ( 1600 defaultdict(lambda: defaultdict(set)) 1601 ) 1602 snapshots_by_table_name: t.Dict[exp.Table, t.Dict[str, Snapshot]] = defaultdict(dict) 1603 for snapshot in target_snapshots: 1604 if not snapshot.is_model or snapshot.is_symbolic: 1605 continue 1606 table = table_name_callable(snapshot) 1607 table_schema = d.schema_(table.db, catalog=table.catalog) 1608 tables_by_gateway_and_schema[snapshot.model_gateway][table_schema].add(table.name) 1609 snapshots_by_table_name[table_schema][table.name] = snapshot 1610 1611 def _get_data_objects_in_schema( 1612 schema: exp.Table, 1613 object_names: t.Optional[t.Set[str]] = None, 1614 gateway: t.Optional[str] = None, 1615 ) -> t.List[DataObject]: 1616 logger.info("Listing data objects in schema %s", schema.sql()) 1617 return self.get_adapter(gateway).get_data_objects( 1618 schema, object_names, safe_to_cache=True 1619 ) 1620 1621 with self.concurrent_context(): 1622 snapshot_id_to_obj: t.Dict[SnapshotId, DataObject] = {} 1623 # A schema can be shared across multiple engines, so we need to group tables by both gateway and schema 1624 for gateway, tables_by_schema in tables_by_gateway_and_schema.items(): 1625 schema_list = list(tables_by_schema.keys()) 1626 results = concurrent_apply_to_values( 1627 schema_list, 1628 lambda s: _get_data_objects_in_schema( 1629 schema=s, object_names=tables_by_schema.get(s), gateway=gateway 1630 ), 1631 self.ddl_concurrent_tasks, 1632 ) 1633 1634 for schema, objs in zip(schema_list, results): 1635 snapshots_by_name = snapshots_by_table_name.get(schema, {}) 1636 for obj in objs: 1637 if obj.name in snapshots_by_name: 1638 snapshot_id_to_obj[snapshots_by_name[obj.name].snapshot_id] = obj 1639 1640 return snapshot_id_to_obj 1641 1642 1643def _evaluation_strategy(snapshot: SnapshotInfoLike, adapter: EngineAdapter) -> EvaluationStrategy: 1644 klass: t.Type 1645 if snapshot.is_embedded: 1646 klass = EmbeddedStrategy 1647 elif snapshot.is_symbolic or snapshot.is_audit: 1648 klass = SymbolicStrategy 1649 elif snapshot.is_full: 1650 klass = FullRefreshStrategy 1651 elif snapshot.is_seed: 1652 klass = SeedStrategy 1653 elif snapshot.is_incremental_by_time_range: 1654 klass = IncrementalByTimeRangeStrategy 1655 elif snapshot.is_incremental_by_unique_key: 1656 klass = IncrementalByUniqueKeyStrategy 1657 elif snapshot.is_incremental_by_partition: 1658 klass = IncrementalByPartitionStrategy 1659 elif snapshot.is_incremental_unmanaged: 1660 klass = IncrementalUnmanagedStrategy 1661 elif snapshot.is_view: 1662 klass = ViewStrategy 1663 elif snapshot.is_scd_type_2: 1664 klass = SCDType2Strategy 1665 elif snapshot.is_dbt_custom: 1666 if hasattr(snapshot, "model") and isinstance( 1667 (model_kind := snapshot.model.kind), DbtCustomKind 1668 ): 1669 return DbtCustomMaterializationStrategy( 1670 adapter=adapter, 1671 materialization_name=model_kind.materialization, 1672 materialization_template=model_kind.definition, 1673 ) 1674 1675 raise SQLMeshError( 1676 f"Expected DbtCustomKind for dbt custom materialization in model '{snapshot.name}'" 1677 ) 1678 elif snapshot.is_custom: 1679 if snapshot.custom_materialization is None: 1680 raise SQLMeshError( 1681 f"Missing the name of a custom evaluation strategy in model '{snapshot.name}'." 1682 ) 1683 _, klass = get_custom_materialization_type_or_raise(snapshot.custom_materialization) 1684 return klass(adapter) 1685 elif snapshot.is_managed: 1686 klass = EngineManagedStrategy 1687 else: 1688 raise SQLMeshError(f"Unexpected snapshot: {snapshot}") 1689 1690 return klass(adapter) 1691 1692 1693class EvaluationStrategy(abc.ABC): 1694 def __init__(self, adapter: EngineAdapter): 1695 self.adapter = adapter 1696 1697 @abc.abstractmethod 1698 def insert( 1699 self, 1700 table_name: str, 1701 query_or_df: QueryOrDF, 1702 model: Model, 1703 is_first_insert: bool, 1704 render_kwargs: t.Dict[str, t.Any], 1705 **kwargs: t.Any, 1706 ) -> None: 1707 """Inserts the given query or a DataFrame into the target table or a view. 1708 1709 Args: 1710 table_name: The name of the target table or view. 1711 query_or_df: A query or a DataFrame to insert. 1712 model: The target model. 1713 is_first_insert: Whether this is the first insert for this version of a model. This value is set to True 1714 if no data has been previously inserted into the target table, or when the entire history of the target model has 1715 been restated. Note that in the latter case, the table might contain data from previous executions, and it is the 1716 responsibility of a specific evaluation strategy to handle the truncation of the table if necessary. 1717 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1718 """ 1719 1720 @abc.abstractmethod 1721 def append( 1722 self, 1723 table_name: str, 1724 query_or_df: QueryOrDF, 1725 model: Model, 1726 render_kwargs: t.Dict[str, t.Any], 1727 **kwargs: t.Any, 1728 ) -> None: 1729 """Appends the given query or a DataFrame to the existing table. 1730 1731 Args: 1732 table_name: The target table name. 1733 query_or_df: A query or a DataFrame to insert. 1734 model: The target model. 1735 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1736 """ 1737 1738 @abc.abstractmethod 1739 def create( 1740 self, 1741 table_name: str, 1742 model: Model, 1743 is_table_deployable: bool, 1744 render_kwargs: t.Dict[str, t.Any], 1745 skip_grants: bool, 1746 **kwargs: t.Any, 1747 ) -> None: 1748 """Creates the target table or view. 1749 1750 Note that the intention here is to just create the table structure, data is loaded in insert() and append() 1751 1752 Args: 1753 table_name: The name of a table or a view. 1754 model: The target model. 1755 is_table_deployable: True if this creation request is for the "main" table that *might* be deployed to a production environment. 1756 False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex 1757 which determines if the snapshot is deployable to production or not 1758 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1759 """ 1760 1761 @abc.abstractmethod 1762 def migrate( 1763 self, 1764 target_table_name: str, 1765 source_table_name: str, 1766 snapshot: Snapshot, 1767 *, 1768 ignore_destructive: bool, 1769 ignore_additive: bool, 1770 **kwargs: t.Any, 1771 ) -> None: 1772 """Migrates the target table schema so that it corresponds to the source table schema. 1773 1774 Args: 1775 target_table_name: The target table name. 1776 source_table_name: The source table name. 1777 snapshot: The target snapshot. 1778 ignore_destructive: If True, destructive changes are not created when migrating. 1779 This is used for forward-only models that are being migrated to a new version. 1780 ignore_additive: If True, additive changes are not created when migrating. 1781 This is used for forward-only models that are being migrated to a new version. 1782 """ 1783 1784 @abc.abstractmethod 1785 def delete(self, name: str, **kwargs: t.Any) -> None: 1786 """Deletes a target table or a view. 1787 1788 Args: 1789 name: The name of a table or a view. 1790 """ 1791 1792 @abc.abstractmethod 1793 def promote( 1794 self, 1795 table_name: str, 1796 view_name: str, 1797 model: Model, 1798 environment: str, 1799 **kwargs: t.Any, 1800 ) -> None: 1801 """Updates the target view to point to the target table. 1802 1803 Args: 1804 table_name: The name of a table in the physical layer that is being promoted. 1805 view_name: The name of the target view in the virtual layer. 1806 model: The model that is being promoted. 1807 environment: The name of the target environment. 1808 """ 1809 1810 @abc.abstractmethod 1811 def demote(self, view_name: str, **kwargs: t.Any) -> None: 1812 """Deletes the target view in the virtual layer. 1813 1814 Args: 1815 view_name: The name of the target view in the virtual layer. 1816 """ 1817 1818 @abc.abstractmethod 1819 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 1820 """Executes the snapshot's pre statements. 1821 1822 Args: 1823 snapshot: The target snapshot. 1824 render_kwargs: Additional key-value arguments to pass when rendering the statements. 1825 """ 1826 1827 @abc.abstractmethod 1828 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 1829 """Executes the snapshot's post statements. 1830 1831 Args: 1832 snapshot: The target snapshot. 1833 render_kwargs: Additional key-value arguments to pass when rendering the statements. 1834 """ 1835 1836 def _apply_grants( 1837 self, 1838 model: Model, 1839 table_name: str, 1840 target_layer: GrantsTargetLayer, 1841 is_snapshot_deployable: bool = False, 1842 ) -> None: 1843 """Apply grants for a model if grants are configured. 1844 1845 This method provides consistent grants application across all evaluation strategies. 1846 It ensures that whenever a physical database object (table, view, materialized view) 1847 is created or modified, the appropriate grants are applied. 1848 1849 Args: 1850 model: The SQLMesh model containing grants configuration 1851 table_name: The target table/view name to apply grants to 1852 target_layer: The grants application layer (physical or virtual) 1853 is_snapshot_deployable: Whether the snapshot is deployable (targeting production) 1854 """ 1855 grants_config = model.grants 1856 if grants_config is None: 1857 return 1858 1859 if not self.adapter.SUPPORTS_GRANTS: 1860 logger.warning( 1861 f"Engine {self.adapter.__class__.__name__} does not support grants. " 1862 f"Skipping grants application for model {model.name}" 1863 ) 1864 return 1865 1866 model_grants_target_layer = model.grants_target_layer 1867 deployable_vde_dev_only = ( 1868 is_snapshot_deployable and model.virtual_environment_mode.is_dev_only 1869 ) 1870 1871 # table_type is always a VIEW in the virtual layer unless model is deployable and VDE is dev_only 1872 # in which case we fall back to the model's model_grants_table_type 1873 if target_layer == GrantsTargetLayer.VIRTUAL and not deployable_vde_dev_only: 1874 model_grants_table_type = DataObjectType.VIEW 1875 else: 1876 model_grants_table_type = model.grants_table_type 1877 1878 if ( 1879 model_grants_target_layer.is_all 1880 or model_grants_target_layer == target_layer 1881 # Always apply grants in production when VDE is dev_only regardless of target_layer 1882 # since only physical tables are created in production 1883 or deployable_vde_dev_only 1884 ): 1885 logger.info(f"Applying grants for model {model.name} to table {table_name}") 1886 self.adapter.sync_grants_config( 1887 exp.to_table(table_name, dialect=self.adapter.dialect), 1888 grants_config, 1889 model_grants_table_type, 1890 ) 1891 else: 1892 logger.debug( 1893 f"Skipping grants application for model {model.name} in {target_layer} layer" 1894 ) 1895 1896 1897class SymbolicStrategy(EvaluationStrategy): 1898 def insert( 1899 self, 1900 table_name: str, 1901 query_or_df: QueryOrDF, 1902 model: Model, 1903 is_first_insert: bool, 1904 render_kwargs: t.Dict[str, t.Any], 1905 **kwargs: t.Any, 1906 ) -> None: 1907 pass 1908 1909 def append( 1910 self, 1911 table_name: str, 1912 query_or_df: QueryOrDF, 1913 model: Model, 1914 render_kwargs: t.Dict[str, t.Any], 1915 **kwargs: t.Any, 1916 ) -> None: 1917 pass 1918 1919 def create( 1920 self, 1921 table_name: str, 1922 model: Model, 1923 is_table_deployable: bool, 1924 render_kwargs: t.Dict[str, t.Any], 1925 skip_grants: bool, 1926 **kwargs: t.Any, 1927 ) -> None: 1928 pass 1929 1930 def migrate( 1931 self, 1932 target_table_name: str, 1933 source_table_name: str, 1934 snapshot: Snapshot, 1935 *, 1936 ignore_destructive: bool, 1937 ignore_additive: bool, 1938 **kwarg: t.Any, 1939 ) -> None: 1940 pass 1941 1942 def delete(self, name: str, **kwargs: t.Any) -> None: 1943 pass 1944 1945 def promote( 1946 self, 1947 table_name: str, 1948 view_name: str, 1949 model: Model, 1950 environment: str, 1951 **kwargs: t.Any, 1952 ) -> None: 1953 pass 1954 1955 def demote(self, view_name: str, **kwargs: t.Any) -> None: 1956 pass 1957 1958 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: 1959 pass 1960 1961 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: 1962 pass 1963 1964 1965class EmbeddedStrategy(SymbolicStrategy): 1966 def promote( 1967 self, 1968 table_name: str, 1969 view_name: str, 1970 model: Model, 1971 environment: str, 1972 **kwargs: t.Any, 1973 ) -> None: 1974 logger.info("Dropping view '%s' for non-materialized table", view_name) 1975 self.adapter.drop_view(view_name, cascade=False) 1976 1977 1978class PromotableStrategy(EvaluationStrategy, abc.ABC): 1979 def promote( 1980 self, 1981 table_name: str, 1982 view_name: str, 1983 model: Model, 1984 environment: str, 1985 **kwargs: t.Any, 1986 ) -> None: 1987 is_prod = environment == c.PROD 1988 logger.info("Updating view '%s' to point at table '%s'", view_name, table_name) 1989 render_kwargs: t.Dict[str, t.Any] = dict( 1990 start=kwargs.get("start"), 1991 end=kwargs.get("end"), 1992 execution_time=kwargs.get("execution_time"), 1993 engine_adapter=kwargs.get("engine_adapter"), 1994 snapshots=kwargs.get("snapshots"), 1995 deployability_index=kwargs.get("deployability_index"), 1996 table_mapping=kwargs.get("table_mapping"), 1997 runtime_stage=kwargs.get("runtime_stage"), 1998 ) 1999 self.adapter.create_view( 2000 view_name, 2001 exp.select("*").from_(table_name, dialect=self.adapter.dialect), 2002 table_description=model.description if is_prod else None, 2003 column_descriptions=model.column_descriptions if is_prod else None, 2004 view_properties=model.render_virtual_properties(**render_kwargs), 2005 ) 2006 2007 snapshot = kwargs.get("snapshot") 2008 deployability_index = kwargs.get("deployability_index") 2009 is_snapshot_deployable = ( 2010 deployability_index.is_deployable(snapshot) 2011 if snapshot and deployability_index 2012 else False 2013 ) 2014 2015 # Apply grants to the virtual layer (view) after promotion 2016 self._apply_grants(model, view_name, GrantsTargetLayer.VIRTUAL, is_snapshot_deployable) 2017 2018 def demote(self, view_name: str, **kwargs: t.Any) -> None: 2019 logger.info("Dropping view '%s'", view_name) 2020 self.adapter.drop_view(view_name, cascade=False) 2021 2022 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 2023 self.adapter.execute(snapshot.model.render_pre_statements(**render_kwargs)) 2024 2025 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 2026 self.adapter.execute(snapshot.model.render_post_statements(**render_kwargs)) 2027 2028 2029class MaterializableStrategy(PromotableStrategy, abc.ABC): 2030 def create( 2031 self, 2032 table_name: str, 2033 model: Model, 2034 is_table_deployable: bool, 2035 render_kwargs: t.Dict[str, t.Any], 2036 skip_grants: bool, 2037 **kwargs: t.Any, 2038 ) -> None: 2039 ctas_query = model.ctas_query(**render_kwargs) 2040 physical_properties = kwargs.get("physical_properties", model.physical_properties) 2041 2042 logger.info("Creating table '%s'", table_name) 2043 if model.annotated: 2044 self.adapter.create_table( 2045 table_name, 2046 target_columns_to_types=model.columns_to_types_or_raise, 2047 table_format=model.table_format, 2048 storage_format=model.storage_format, 2049 partitioned_by=model.partitioned_by, 2050 partition_interval_unit=model.partition_interval_unit, 2051 clustered_by=model.clustered_by, 2052 table_properties=physical_properties, 2053 table_description=model.description if is_table_deployable else None, 2054 column_descriptions=model.column_descriptions if is_table_deployable else None, 2055 ) 2056 2057 # If we create both temp and prod tables, we need to make sure that we dry run once. 2058 dry_run = kwargs.get("dry_run", True) or not is_table_deployable 2059 2060 # Only sql models have queries that can be tested. 2061 # We also need to make sure that we don't dry run on Redshift because its planner / optimizer sometimes 2062 # breaks on our CTAS queries due to us relying on the WHERE FALSE LIMIT 0 combo. 2063 if model.is_sql and dry_run and self.adapter.dialect != "redshift": 2064 logger.info("Dry running model '%s'", model.name) 2065 self.adapter.fetchall(ctas_query) 2066 else: 2067 self.adapter.ctas( 2068 table_name, 2069 ctas_query, 2070 model.columns_to_types, 2071 table_format=model.table_format, 2072 storage_format=model.storage_format, 2073 partitioned_by=model.partitioned_by, 2074 partition_interval_unit=model.partition_interval_unit, 2075 clustered_by=model.clustered_by, 2076 table_properties=physical_properties, 2077 table_description=model.description if is_table_deployable else None, 2078 column_descriptions=model.column_descriptions if is_table_deployable else None, 2079 ) 2080 2081 # Apply grants after table creation (unless explicitly skipped by caller) 2082 if not skip_grants: 2083 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2084 self._apply_grants( 2085 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2086 ) 2087 2088 def migrate( 2089 self, 2090 target_table_name: str, 2091 source_table_name: str, 2092 snapshot: Snapshot, 2093 *, 2094 ignore_destructive: bool, 2095 ignore_additive: bool, 2096 **kwargs: t.Any, 2097 ) -> None: 2098 logger.info(f"Altering table '{target_table_name}'") 2099 alter_operations = self.adapter.get_alter_operations( 2100 target_table_name, 2101 source_table_name, 2102 ignore_destructive=ignore_destructive, 2103 ignore_additive=ignore_additive, 2104 ) 2105 _check_destructive_schema_change( 2106 snapshot, alter_operations, kwargs["allow_destructive_snapshots"] 2107 ) 2108 _check_additive_schema_change( 2109 snapshot, alter_operations, kwargs["allow_additive_snapshots"] 2110 ) 2111 self.adapter.alter_table(alter_operations) 2112 2113 # Apply grants after schema migration 2114 deployability_index = kwargs.get("deployability_index") 2115 is_snapshot_deployable = ( 2116 deployability_index.is_deployable(snapshot) if deployability_index else False 2117 ) 2118 self._apply_grants( 2119 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2120 ) 2121 2122 def delete(self, name: str, **kwargs: t.Any) -> None: 2123 _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) 2124 self.adapter.drop_table(name, cascade=kwargs.pop("cascade", False)) 2125 logger.info("Dropped table '%s'", name) 2126 2127 def _replace_query_for_model( 2128 self, 2129 model: Model, 2130 name: str, 2131 query_or_df: QueryOrDF, 2132 render_kwargs: t.Dict[str, t.Any], 2133 skip_grants: bool = False, 2134 **kwargs: t.Any, 2135 ) -> None: 2136 """Replaces the table for the given model. 2137 2138 Args: 2139 model: The target model. 2140 name: The name of the target table. 2141 query_or_df: The query or DataFrame to replace the target table with. 2142 """ 2143 if (model.is_seed or model.kind.is_full) and model.annotated: 2144 columns_to_types = model.columns_to_types_or_raise 2145 source_columns: t.Optional[t.List[str]] = list(columns_to_types) 2146 else: 2147 try: 2148 # Source columns from the underlying table to prevent unintentional table schema changes during restatement of incremental models. 2149 columns_to_types, source_columns = self._get_target_and_source_columns( 2150 model, name, render_kwargs, force_get_columns_from_target=True 2151 ) 2152 except Exception: 2153 columns_to_types, source_columns = None, None 2154 2155 self.adapter.replace_query( 2156 name, 2157 query_or_df, 2158 table_format=model.table_format, 2159 storage_format=model.storage_format, 2160 partitioned_by=model.partitioned_by, 2161 partition_interval_unit=model.partition_interval_unit, 2162 clustered_by=model.clustered_by, 2163 table_properties=kwargs.get("physical_properties", model.physical_properties), 2164 table_description=model.description, 2165 column_descriptions=model.column_descriptions, 2166 target_columns_to_types=columns_to_types, 2167 source_columns=source_columns, 2168 ) 2169 2170 # Apply grants after table replacement (unless explicitly skipped by caller) 2171 if not skip_grants: 2172 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2173 self._apply_grants(model, name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) 2174 2175 def _get_target_and_source_columns( 2176 self, 2177 model: Model, 2178 table_name: str, 2179 render_kwargs: t.Dict[str, t.Any], 2180 force_get_columns_from_target: bool = False, 2181 ) -> t.Tuple[t.Dict[str, exp.DataType], t.Optional[t.List[str]]]: 2182 if force_get_columns_from_target: 2183 target_column_to_types = self.adapter.columns(table_name) 2184 else: 2185 target_column_to_types = ( 2186 model.columns_to_types # type: ignore 2187 if model.annotated 2188 and not model.on_destructive_change.is_ignore 2189 and not model.on_additive_change.is_ignore 2190 else self.adapter.columns(table_name) 2191 ) 2192 assert target_column_to_types is not None 2193 if model.on_destructive_change.is_ignore or model.on_additive_change.is_ignore: 2194 # We need to identify the columns that are only in the source so we create an empty table with 2195 # the user query to determine that 2196 temp_table_name = exp.table_( 2197 "diff", 2198 db=model.physical_schema, 2199 ) 2200 with self.adapter.temp_table( 2201 model.ctas_query(**render_kwargs), name=temp_table_name 2202 ) as temp_table: 2203 source_columns = list(self.adapter.columns(temp_table)) 2204 else: 2205 source_columns = None 2206 return target_column_to_types, source_columns 2207 2208 2209class IncrementalStrategy(MaterializableStrategy, abc.ABC): 2210 def append( 2211 self, 2212 table_name: str, 2213 query_or_df: QueryOrDF, 2214 model: Model, 2215 render_kwargs: t.Dict[str, t.Any], 2216 **kwargs: t.Any, 2217 ) -> None: 2218 columns_to_types, source_columns = self._get_target_and_source_columns( 2219 model, table_name, render_kwargs=render_kwargs 2220 ) 2221 self.adapter.insert_append( 2222 table_name, 2223 query_or_df, 2224 target_columns_to_types=columns_to_types, 2225 source_columns=source_columns, 2226 ) 2227 2228 2229class IncrementalByPartitionStrategy(IncrementalStrategy): 2230 def insert( 2231 self, 2232 table_name: str, 2233 query_or_df: QueryOrDF, 2234 model: Model, 2235 is_first_insert: bool, 2236 render_kwargs: t.Dict[str, t.Any], 2237 **kwargs: t.Any, 2238 ) -> None: 2239 if is_first_insert: 2240 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2241 else: 2242 columns_to_types, source_columns = self._get_target_and_source_columns( 2243 model, table_name, render_kwargs=render_kwargs 2244 ) 2245 self.adapter.insert_overwrite_by_partition( 2246 table_name, 2247 query_or_df, 2248 partitioned_by=model.partitioned_by, 2249 target_columns_to_types=columns_to_types, 2250 source_columns=source_columns, 2251 ) 2252 2253 2254class IncrementalByTimeRangeStrategy(IncrementalStrategy): 2255 def insert( 2256 self, 2257 table_name: str, 2258 query_or_df: QueryOrDF, 2259 model: Model, 2260 is_first_insert: bool, 2261 render_kwargs: t.Dict[str, t.Any], 2262 **kwargs: t.Any, 2263 ) -> None: 2264 assert model.time_column 2265 columns_to_types, source_columns = self._get_target_and_source_columns( 2266 model, table_name, render_kwargs=render_kwargs 2267 ) 2268 self.adapter.insert_overwrite_by_time_partition( 2269 table_name, 2270 query_or_df, 2271 time_formatter=model.convert_to_time_column, 2272 time_column=model.time_column, 2273 target_columns_to_types=columns_to_types, 2274 source_columns=source_columns, 2275 **kwargs, 2276 ) 2277 2278 2279class IncrementalByUniqueKeyStrategy(IncrementalStrategy): 2280 def insert( 2281 self, 2282 table_name: str, 2283 query_or_df: QueryOrDF, 2284 model: Model, 2285 is_first_insert: bool, 2286 render_kwargs: t.Dict[str, t.Any], 2287 **kwargs: t.Any, 2288 ) -> None: 2289 if is_first_insert: 2290 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2291 else: 2292 columns_to_types, source_columns = self._get_target_and_source_columns( 2293 model, 2294 table_name, 2295 render_kwargs=render_kwargs, 2296 ) 2297 self.adapter.merge( 2298 table_name, 2299 query_or_df, 2300 target_columns_to_types=columns_to_types, 2301 unique_key=model.unique_key, 2302 when_matched=model.when_matched, 2303 merge_filter=model.render_merge_filter( 2304 start=kwargs.get("start"), 2305 end=kwargs.get("end"), 2306 execution_time=kwargs.get("execution_time"), 2307 ), 2308 physical_properties=kwargs.get("physical_properties", model.physical_properties), 2309 source_columns=source_columns, 2310 ) 2311 2312 def append( 2313 self, 2314 table_name: str, 2315 query_or_df: QueryOrDF, 2316 model: Model, 2317 render_kwargs: t.Dict[str, t.Any], 2318 **kwargs: t.Any, 2319 ) -> None: 2320 columns_to_types, source_columns = self._get_target_and_source_columns( 2321 model, table_name, render_kwargs=render_kwargs 2322 ) 2323 self.adapter.merge( 2324 table_name, 2325 query_or_df, 2326 target_columns_to_types=columns_to_types, 2327 unique_key=model.unique_key, 2328 when_matched=model.when_matched, 2329 merge_filter=model.render_merge_filter( 2330 start=kwargs.get("start"), 2331 end=kwargs.get("end"), 2332 execution_time=kwargs.get("execution_time"), 2333 ), 2334 physical_properties=kwargs.get("physical_properties", model.physical_properties), 2335 source_columns=source_columns, 2336 ) 2337 2338 2339class IncrementalUnmanagedStrategy(IncrementalStrategy): 2340 def append( 2341 self, 2342 table_name: str, 2343 query_or_df: QueryOrDF, 2344 model: Model, 2345 render_kwargs: t.Dict[str, t.Any], 2346 **kwargs: t.Any, 2347 ) -> None: 2348 columns_to_types, source_columns = self._get_target_and_source_columns( 2349 model, table_name, render_kwargs=render_kwargs 2350 ) 2351 self.adapter.insert_append( 2352 table_name, 2353 query_or_df, 2354 target_columns_to_types=columns_to_types, 2355 source_columns=source_columns, 2356 ) 2357 2358 def insert( 2359 self, 2360 table_name: str, 2361 query_or_df: QueryOrDF, 2362 model: Model, 2363 is_first_insert: bool, 2364 render_kwargs: t.Dict[str, t.Any], 2365 **kwargs: t.Any, 2366 ) -> None: 2367 if is_first_insert: 2368 return self._replace_query_for_model( 2369 model, table_name, query_or_df, render_kwargs, **kwargs 2370 ) 2371 if isinstance(model.kind, IncrementalUnmanagedKind) and model.kind.insert_overwrite: 2372 columns_to_types, source_columns = self._get_target_and_source_columns( 2373 model, 2374 table_name, 2375 render_kwargs=render_kwargs, 2376 ) 2377 2378 return self.adapter.insert_overwrite_by_partition( 2379 table_name, 2380 query_or_df, 2381 model.partitioned_by, 2382 target_columns_to_types=columns_to_types, 2383 source_columns=source_columns, 2384 ) 2385 return self.append( 2386 table_name, 2387 query_or_df, 2388 model, 2389 render_kwargs=render_kwargs, 2390 **kwargs, 2391 ) 2392 2393 2394class FullRefreshStrategy(MaterializableStrategy): 2395 def append( 2396 self, 2397 table_name: str, 2398 query_or_df: QueryOrDF, 2399 model: Model, 2400 render_kwargs: t.Dict[str, t.Any], 2401 **kwargs: t.Any, 2402 ) -> None: 2403 self.adapter.insert_append( 2404 table_name, 2405 query_or_df, 2406 target_columns_to_types=model.columns_to_types, 2407 ) 2408 2409 def insert( 2410 self, 2411 table_name: str, 2412 query_or_df: QueryOrDF, 2413 model: Model, 2414 is_first_insert: bool, 2415 render_kwargs: t.Dict[str, t.Any], 2416 **kwargs: t.Any, 2417 ) -> None: 2418 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2419 2420 2421class SeedStrategy(MaterializableStrategy): 2422 def create( 2423 self, 2424 table_name: str, 2425 model: Model, 2426 is_table_deployable: bool, 2427 render_kwargs: t.Dict[str, t.Any], 2428 skip_grants: bool, 2429 **kwargs: t.Any, 2430 ) -> None: 2431 model = t.cast(SeedModel, model) 2432 if not model.is_hydrated and self.adapter.table_exists(table_name): 2433 # This likely means that the table was created and populated previously, but the evaluation stage 2434 # failed before the interval could be added for this model. 2435 logger.warning( 2436 "Seed model '%s' is not hydrated, but the table '%s' exists. Skipping creation", 2437 model.name, 2438 table_name, 2439 ) 2440 return 2441 2442 super().create( 2443 table_name, 2444 model, 2445 is_table_deployable, 2446 render_kwargs, 2447 skip_grants=True, # Skip grants; they're applied after data insertion 2448 **kwargs, 2449 ) 2450 # For seeds we insert data at the time of table creation. 2451 try: 2452 for index, df in enumerate(model.render_seed()): 2453 if index == 0: 2454 self._replace_query_for_model( 2455 model, 2456 table_name, 2457 df, 2458 render_kwargs, 2459 skip_grants=True, # Skip grants; they're applied after data insertion 2460 **kwargs, 2461 ) 2462 else: 2463 self.adapter.insert_append( 2464 table_name, df, target_columns_to_types=model.columns_to_types 2465 ) 2466 2467 if not skip_grants: 2468 # Apply grants after seed table creation and data insertion 2469 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2470 self._apply_grants( 2471 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2472 ) 2473 except Exception: 2474 self.adapter.drop_table(table_name) 2475 raise 2476 2477 def migrate( 2478 self, 2479 target_table_name: str, 2480 source_table_name: str, 2481 snapshot: Snapshot, 2482 *, 2483 ignore_destructive: bool, 2484 ignore_additive: bool, 2485 **kwargs: t.Any, 2486 ) -> None: 2487 raise NotImplementedError("Seeds do not support migrations.") 2488 2489 def insert( 2490 self, 2491 table_name: str, 2492 query_or_df: QueryOrDF, 2493 model: Model, 2494 is_first_insert: bool, 2495 render_kwargs: t.Dict[str, t.Any], 2496 **kwargs: t.Any, 2497 ) -> None: 2498 # Data has already been inserted at the time of table creation. 2499 pass 2500 2501 def append( 2502 self, 2503 table_name: str, 2504 query_or_df: QueryOrDF, 2505 model: Model, 2506 render_kwargs: t.Dict[str, t.Any], 2507 **kwargs: t.Any, 2508 ) -> None: 2509 # Data has already been inserted at the time of table creation. 2510 pass 2511 2512 2513class SCDType2Strategy(IncrementalStrategy): 2514 def create( 2515 self, 2516 table_name: str, 2517 model: Model, 2518 is_table_deployable: bool, 2519 render_kwargs: t.Dict[str, t.Any], 2520 skip_grants: bool, 2521 **kwargs: t.Any, 2522 ) -> None: 2523 assert isinstance(model.kind, (SCDType2ByTimeKind, SCDType2ByColumnKind)) 2524 if model.annotated: 2525 logger.info("Creating table '%s'", table_name) 2526 columns_to_types = model.columns_to_types_or_raise 2527 if isinstance(model.kind, SCDType2ByTimeKind): 2528 columns_to_types[model.kind.updated_at_name.name] = model.kind.time_data_type 2529 self.adapter.create_table( 2530 table_name, 2531 target_columns_to_types=columns_to_types, 2532 table_format=model.table_format, 2533 storage_format=model.storage_format, 2534 partitioned_by=model.partitioned_by, 2535 partition_interval_unit=model.partition_interval_unit, 2536 clustered_by=model.clustered_by, 2537 table_properties=kwargs.get("physical_properties", model.physical_properties), 2538 table_description=model.description if is_table_deployable else None, 2539 column_descriptions=model.column_descriptions if is_table_deployable else None, 2540 ) 2541 else: 2542 # We assume that the data type for `updated_at_name` matches the data type that is defined for 2543 # `time_data_type`. If that isn't the case, then the user might get an error about not being able 2544 # to do comparisons across different data types 2545 super().create( 2546 table_name, 2547 model, 2548 is_table_deployable, 2549 render_kwargs, 2550 skip_grants, 2551 **kwargs, 2552 ) 2553 2554 if not skip_grants: 2555 # Apply grants after SCD Type 2 table creation 2556 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2557 self._apply_grants( 2558 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2559 ) 2560 2561 def insert( 2562 self, 2563 table_name: str, 2564 query_or_df: QueryOrDF, 2565 model: Model, 2566 is_first_insert: bool, 2567 render_kwargs: t.Dict[str, t.Any], 2568 **kwargs: t.Any, 2569 ) -> None: 2570 # Source columns from the underlying table to prevent unintentional table schema changes during restatement of incremental models. 2571 columns_to_types, source_columns = self._get_target_and_source_columns( 2572 model, 2573 table_name, 2574 render_kwargs=render_kwargs, 2575 force_get_columns_from_target=True, 2576 ) 2577 if isinstance(model.kind, SCDType2ByTimeKind): 2578 self.adapter.scd_type_2_by_time( 2579 target_table=table_name, 2580 source_table=query_or_df, 2581 unique_key=model.unique_key, 2582 valid_from_col=model.kind.valid_from_name, 2583 valid_to_col=model.kind.valid_to_name, 2584 execution_time=kwargs["execution_time"], 2585 updated_at_col=model.kind.updated_at_name, 2586 invalidate_hard_deletes=model.kind.invalidate_hard_deletes, 2587 updated_at_as_valid_from=model.kind.updated_at_as_valid_from, 2588 target_columns_to_types=columns_to_types, 2589 table_format=model.table_format, 2590 table_description=model.description, 2591 column_descriptions=model.column_descriptions, 2592 truncate=is_first_insert, 2593 source_columns=source_columns, 2594 storage_format=model.storage_format, 2595 partitioned_by=model.partitioned_by, 2596 partition_interval_unit=model.partition_interval_unit, 2597 clustered_by=model.clustered_by, 2598 table_properties=kwargs.get("physical_properties", model.physical_properties), 2599 ) 2600 elif isinstance(model.kind, SCDType2ByColumnKind): 2601 self.adapter.scd_type_2_by_column( 2602 target_table=table_name, 2603 source_table=query_or_df, 2604 unique_key=model.unique_key, 2605 valid_from_col=model.kind.valid_from_name, 2606 valid_to_col=model.kind.valid_to_name, 2607 execution_time=model.kind.updated_at_name or kwargs["execution_time"], 2608 check_columns=model.kind.columns, 2609 invalidate_hard_deletes=model.kind.invalidate_hard_deletes, 2610 execution_time_as_valid_from=model.kind.execution_time_as_valid_from, 2611 target_columns_to_types=columns_to_types, 2612 table_format=model.table_format, 2613 table_description=model.description, 2614 column_descriptions=model.column_descriptions, 2615 truncate=is_first_insert, 2616 source_columns=source_columns, 2617 storage_format=model.storage_format, 2618 partitioned_by=model.partitioned_by, 2619 partition_interval_unit=model.partition_interval_unit, 2620 clustered_by=model.clustered_by, 2621 table_properties=kwargs.get("physical_properties", model.physical_properties), 2622 ) 2623 else: 2624 raise SQLMeshError( 2625 f"Unexpected SCD Type 2 kind: {model.kind}. This is not expected and please report this as a bug." 2626 ) 2627 2628 # Apply grants after SCD Type 2 table recreation 2629 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2630 self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) 2631 2632 def append( 2633 self, 2634 table_name: str, 2635 query_or_df: QueryOrDF, 2636 model: Model, 2637 render_kwargs: t.Dict[str, t.Any], 2638 **kwargs: t.Any, 2639 ) -> None: 2640 return self.insert( 2641 table_name, 2642 query_or_df, 2643 model, 2644 is_first_insert=False, 2645 render_kwargs=render_kwargs, 2646 **kwargs, 2647 ) 2648 2649 2650class ViewStrategy(PromotableStrategy): 2651 def insert( 2652 self, 2653 table_name: str, 2654 query_or_df: QueryOrDF, 2655 model: Model, 2656 is_first_insert: bool, 2657 render_kwargs: t.Dict[str, t.Any], 2658 **kwargs: t.Any, 2659 ) -> None: 2660 # We should recreate MVs across supported engines (Snowflake, BigQuery etc) because 2661 # if upstream tables were recreated (e.g FULL models), the MVs would be silently invalidated. 2662 # The only exception to that rule is RisingWave which doesn't support CREATE OR REPLACE, so upstream 2663 # models don't recreate their physical tables for the MVs to be invalidated. 2664 # However, even for RW we still want to recreate MVs to avoid stale references, as is the case with normal views. 2665 # The flag is_first_insert is used for that matter as a signal to recreate the MV if the snapshot's intervals 2666 # have been cleared by `should_force_rebuild` 2667 is_materialized_view = self._is_materialized_view(model) 2668 must_recreate_view = not self.adapter.HAS_VIEW_BINDING or ( 2669 is_materialized_view and is_first_insert 2670 ) 2671 2672 if self.adapter.table_exists(table_name) and not must_recreate_view: 2673 logger.info("Skipping creation of the view '%s'", table_name) 2674 return 2675 2676 logger.info("Replacing view '%s'", table_name) 2677 self.adapter.create_view( 2678 table_name, 2679 query_or_df, 2680 model.columns_to_types, 2681 replace=must_recreate_view, 2682 materialized=is_materialized_view, 2683 view_properties=kwargs.get("physical_properties", model.physical_properties), 2684 table_description=model.description, 2685 column_descriptions=model.column_descriptions, 2686 ) 2687 2688 # Apply grants after view creation / replacement 2689 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2690 self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) 2691 2692 def append( 2693 self, 2694 table_name: str, 2695 query_or_df: QueryOrDF, 2696 model: Model, 2697 render_kwargs: t.Dict[str, t.Any], 2698 **kwargs: t.Any, 2699 ) -> None: 2700 raise ConfigError(f"Cannot append to a view '{table_name}'.") 2701 2702 def create( 2703 self, 2704 table_name: str, 2705 model: Model, 2706 is_table_deployable: bool, 2707 render_kwargs: t.Dict[str, t.Any], 2708 skip_grants: bool, 2709 **kwargs: t.Any, 2710 ) -> None: 2711 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2712 2713 if self.adapter.table_exists(table_name): 2714 # Make sure we don't recreate the view to prevent deletion of downstream views in engines with no late 2715 # binding support (because of DROP CASCADE). 2716 logger.info("View '%s' already exists", table_name) 2717 2718 if not skip_grants: 2719 # Always apply grants when present, even if view exists, to handle grants updates 2720 self._apply_grants( 2721 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2722 ) 2723 return 2724 2725 logger.info("Creating view '%s'", table_name) 2726 materialized = self._is_materialized_view(model) 2727 materialized_properties = None 2728 if materialized: 2729 materialized_properties = { 2730 "partitioned_by": model.partitioned_by, 2731 "clustered_by": model.clustered_by, 2732 "partition_interval_unit": model.partition_interval_unit, 2733 } 2734 self.adapter.create_view( 2735 table_name, 2736 model.render_query_or_raise(**render_kwargs), 2737 # Make sure we never replace the view during creation to avoid race conditions in engines with no late binding support. 2738 replace=False, 2739 materialized=self._is_materialized_view(model), 2740 materialized_properties=materialized_properties, 2741 view_properties=kwargs.get("physical_properties", model.physical_properties), 2742 table_description=model.description if is_table_deployable else None, 2743 column_descriptions=model.column_descriptions if is_table_deployable else None, 2744 ) 2745 2746 if not skip_grants: 2747 # Apply grants after view creation 2748 self._apply_grants( 2749 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2750 ) 2751 2752 def migrate( 2753 self, 2754 target_table_name: str, 2755 source_table_name: str, 2756 snapshot: Snapshot, 2757 *, 2758 ignore_destructive: bool, 2759 ignore_additive: bool, 2760 **kwargs: t.Any, 2761 ) -> None: 2762 logger.info("Migrating view '%s'", target_table_name) 2763 model = snapshot.model 2764 render_kwargs = dict( 2765 execution_time=now(), snapshots=kwargs["snapshots"], engine_adapter=self.adapter 2766 ) 2767 2768 self.adapter.create_view( 2769 target_table_name, 2770 model.render_query_or_raise(**render_kwargs), 2771 model.columns_to_types, 2772 materialized=self._is_materialized_view(model), 2773 view_properties=model.render_physical_properties(**render_kwargs), 2774 table_description=model.description, 2775 column_descriptions=model.column_descriptions, 2776 ) 2777 2778 # Apply grants after view migration 2779 deployability_index = kwargs.get("deployability_index") 2780 is_snapshot_deployable = ( 2781 deployability_index.is_deployable(snapshot) if deployability_index else False 2782 ) 2783 self._apply_grants( 2784 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2785 ) 2786 2787 def delete(self, name: str, **kwargs: t.Any) -> None: 2788 cascade = kwargs.pop("cascade", False) 2789 try: 2790 # Some engines (e.g., RisingWave) don’t fail when dropping a materialized view with a DROP VIEW statement, 2791 # because views and materialized views don’t share the same namespace. Therefore, we should not ignore if the 2792 # view doesn't exist and let the exception handler attempt to drop the materialized view. 2793 self.adapter.drop_view(name, cascade=cascade, ignore_if_not_exists=False) 2794 except Exception: 2795 logger.debug( 2796 "Failed to drop view '%s'. Trying to drop the materialized view instead", 2797 name, 2798 exc_info=True, 2799 ) 2800 self.adapter.drop_view( 2801 name, materialized=True, cascade=cascade, ignore_if_not_exists=True 2802 ) 2803 logger.info("Dropped view '%s'", name) 2804 2805 def _is_materialized_view(self, model: Model) -> bool: 2806 return isinstance(model.kind, ViewKind) and model.kind.materialized 2807 2808 2809C = t.TypeVar("C", bound=CustomKind) 2810 2811 2812class CustomMaterialization(IncrementalStrategy, t.Generic[C]): 2813 """Base class for custom materializations.""" 2814 2815 def insert( 2816 self, 2817 table_name: str, 2818 query_or_df: QueryOrDF, 2819 model: Model, 2820 is_first_insert: bool, 2821 render_kwargs: t.Dict[str, t.Any], 2822 **kwargs: t.Any, 2823 ) -> None: 2824 """Inserts the given query or a DataFrame into the target table or a view. 2825 2826 Args: 2827 table_name: The name of the target table or view. 2828 query_or_df: A query or a DataFrame to insert. 2829 model: The target model. 2830 is_first_insert: Whether this is the first insert for this version of a model. This value is set to True 2831 if no data has been previously inserted into the target table, or when the entire history of the target model has 2832 been restated. Note that in the latter case, the table might contain data from previous executions, and it is the 2833 responsibility of a specific evaluation strategy to handle the truncation of the table if necessary. 2834 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 2835 """ 2836 raise NotImplementedError( 2837 "Custom materialization strategies must implement the 'insert' method." 2838 ) 2839 2840 2841_custom_materialization_type_cache: t.Optional[ 2842 t.Dict[str, t.Tuple[t.Type[CustomKind], t.Type[CustomMaterialization]]] 2843] = None 2844 2845 2846def get_custom_materialization_kind_type(st: t.Type[CustomMaterialization]) -> t.Type[CustomKind]: 2847 # try to read if there is a custom 'kind' type in use by inspecting the type signature 2848 # eg try to read 'MyCustomKind' from: 2849 # >>>> class MyCustomMaterialization(CustomMaterialization[MyCustomKind]) 2850 # and fall back to base CustomKind if there is no generic type declared 2851 if hasattr(st, "__orig_bases__"): 2852 for base in st.__orig_bases__: 2853 if hasattr(base, "__origin__") and base.__origin__ == CustomMaterialization: 2854 for generic_arg in t.get_args(base): 2855 if not issubclass(generic_arg, CustomKind): 2856 raise SQLMeshError( 2857 f"Custom materialization kind '{generic_arg.__name__}' must be a subclass of CustomKind" 2858 ) 2859 2860 return generic_arg 2861 2862 return CustomKind 2863 2864 2865def get_custom_materialization_type( 2866 name: str, raise_errors: bool = True 2867) -> t.Optional[t.Tuple[t.Type[CustomKind], t.Type[CustomMaterialization]]]: 2868 global _custom_materialization_type_cache 2869 2870 strategy_key = name.lower() 2871 2872 try: 2873 if ( 2874 _custom_materialization_type_cache is None 2875 or strategy_key not in _custom_materialization_type_cache 2876 ): 2877 strategy_types = list(CustomMaterialization.__subclasses__()) 2878 2879 entry_points = metadata.entry_points(group="sqlmesh.materializations") 2880 for entry_point in entry_points: 2881 strategy_type = entry_point.load() 2882 if not issubclass(strategy_type, CustomMaterialization): 2883 raise SQLMeshError( 2884 f"Custom materialization entry point '{entry_point.name}' must be a subclass of CustomMaterialization." 2885 ) 2886 strategy_types.append(strategy_type) 2887 2888 _custom_materialization_type_cache = { 2889 getattr(strategy_type, "NAME", strategy_type.__name__).lower(): ( 2890 get_custom_materialization_kind_type(strategy_type), 2891 strategy_type, 2892 ) 2893 for strategy_type in strategy_types 2894 } 2895 2896 if strategy_key not in _custom_materialization_type_cache: 2897 raise ConfigError(f"Materialization strategy with name '{name}' was not found.") 2898 except (SQLMeshError, ConfigError) as e: 2899 if raise_errors: 2900 raise e 2901 2902 from sqlmesh.core.console import get_console 2903 2904 get_console().log_warning(str(e)) 2905 return None 2906 2907 strategy_kind_type, strategy_type = _custom_materialization_type_cache[strategy_key] 2908 logger.debug( 2909 "Resolved custom materialization '%s' to '%s' (%s)", name, strategy_type, strategy_kind_type 2910 ) 2911 2912 return strategy_kind_type, strategy_type 2913 2914 2915def get_custom_materialization_type_or_raise( 2916 name: str, 2917) -> t.Tuple[t.Type[CustomKind], t.Type[CustomMaterialization]]: 2918 types = get_custom_materialization_type(name, raise_errors=True) 2919 if types is not None: 2920 return types[0], types[1] 2921 2922 # Shouldnt get here as get_custom_materialization_type() has raise_errors=True, but just in case... 2923 raise SQLMeshError(f"Custom materialization '{name}' not present in the Python environment") 2924 2925 2926class DbtCustomMaterializationStrategy(MaterializableStrategy): 2927 def __init__( 2928 self, 2929 adapter: EngineAdapter, 2930 materialization_name: str, 2931 materialization_template: str, 2932 ): 2933 super().__init__(adapter) 2934 self.materialization_name = materialization_name 2935 self.materialization_template = materialization_template 2936 2937 def create( 2938 self, 2939 table_name: str, 2940 model: Model, 2941 is_table_deployable: bool, 2942 render_kwargs: t.Dict[str, t.Any], 2943 skip_grants: bool, 2944 **kwargs: t.Any, 2945 ) -> None: 2946 original_query = model.render_query_or_raise(**render_kwargs) 2947 self._execute_materialization( 2948 table_name=table_name, 2949 query_or_df=original_query.limit(0), 2950 model=model, 2951 is_first_insert=True, 2952 render_kwargs=render_kwargs, 2953 create_only=True, 2954 **kwargs, 2955 ) 2956 2957 # Apply grants after dbt custom materialization table creation 2958 if not skip_grants: 2959 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2960 self._apply_grants( 2961 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2962 ) 2963 2964 def insert( 2965 self, 2966 table_name: str, 2967 query_or_df: QueryOrDF, 2968 model: Model, 2969 is_first_insert: bool, 2970 render_kwargs: t.Dict[str, t.Any], 2971 **kwargs: t.Any, 2972 ) -> None: 2973 self._execute_materialization( 2974 table_name=table_name, 2975 query_or_df=query_or_df, 2976 model=model, 2977 is_first_insert=is_first_insert, 2978 render_kwargs=render_kwargs, 2979 **kwargs, 2980 ) 2981 2982 # Apply grants after custom materialization insert (only on first insert) 2983 if is_first_insert: 2984 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2985 self._apply_grants( 2986 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2987 ) 2988 2989 def append( 2990 self, 2991 table_name: str, 2992 query_or_df: QueryOrDF, 2993 model: Model, 2994 render_kwargs: t.Dict[str, t.Any], 2995 **kwargs: t.Any, 2996 ) -> None: 2997 return self.insert( 2998 table_name, 2999 query_or_df, 3000 model, 3001 is_first_insert=False, 3002 render_kwargs=render_kwargs, 3003 **kwargs, 3004 ) 3005 3006 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 3007 # in dbt custom materialisations it's up to the user to run the pre hooks inside the transaction 3008 if not render_kwargs.get("inside_transaction", True): 3009 super().run_pre_statements( 3010 snapshot=snapshot, 3011 render_kwargs=render_kwargs, 3012 ) 3013 3014 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 3015 # in dbt custom materialisations it's up to the user to run the post hooks inside the transaction 3016 if not render_kwargs.get("inside_transaction", True): 3017 super().run_post_statements( 3018 snapshot=snapshot, 3019 render_kwargs=render_kwargs, 3020 ) 3021 3022 def _execute_materialization( 3023 self, 3024 table_name: str, 3025 query_or_df: QueryOrDF, 3026 model: Model, 3027 is_first_insert: bool, 3028 render_kwargs: t.Dict[str, t.Any], 3029 create_only: bool = False, 3030 **kwargs: t.Any, 3031 ) -> None: 3032 jinja_macros = model.jinja_macros 3033 3034 # For vdes we need to use the table, since we don't know the schema/table at parse time 3035 parts = exp.to_table(table_name, dialect=self.adapter.dialect) 3036 3037 existing_globals = jinja_macros.global_objs 3038 relation_info = existing_globals.get("this") 3039 if isinstance(relation_info, dict): 3040 relation_info["database"] = parts.catalog 3041 relation_info["identifier"] = parts.name 3042 relation_info["name"] = parts.name 3043 3044 jinja_globals = { 3045 **existing_globals, 3046 "this": relation_info, 3047 "database": parts.catalog, 3048 "schema": parts.db, 3049 "identifier": parts.name, 3050 "target": existing_globals.get("target", {"type": self.adapter.dialect}), 3051 "execution_dt": kwargs.get("execution_time"), 3052 "engine_adapter": self.adapter, 3053 "sql": str(query_or_df), 3054 "is_first_insert": is_first_insert, 3055 "create_only": create_only, 3056 "pre_hooks": [ 3057 AttributeDict({"sql": s.this.this, "transaction": transaction}) 3058 for s in model.pre_statements 3059 if (transaction := s.args.get("transaction", True)) 3060 ], 3061 "post_hooks": [ 3062 AttributeDict({"sql": s.this.this, "transaction": transaction}) 3063 for s in model.post_statements 3064 if (transaction := s.args.get("transaction", True)) 3065 ], 3066 "model_instance": model, 3067 **kwargs, 3068 } 3069 3070 try: 3071 jinja_env = jinja_macros.build_environment(**jinja_globals) 3072 template = jinja_env.from_string(self.materialization_template) 3073 3074 try: 3075 template.render() 3076 except MacroReturnVal as ret: 3077 # this is a successful return from a macro call (dbt uses this list of Relations to update their relation cache) 3078 returned_relations = ret.value.get("relations", []) 3079 logger.info( 3080 f"Materialization {self.materialization_name} returned relations: {returned_relations}" 3081 ) 3082 3083 except Exception as e: 3084 raise SQLMeshError( 3085 f"Failed to execute dbt materialization '{self.materialization_name}': {e}" 3086 ) from e 3087 3088 3089class EngineManagedStrategy(MaterializableStrategy): 3090 def create( 3091 self, 3092 table_name: str, 3093 model: Model, 3094 is_table_deployable: bool, 3095 render_kwargs: t.Dict[str, t.Any], 3096 skip_grants: bool, 3097 **kwargs: t.Any, 3098 ) -> None: 3099 is_snapshot_deployable: bool = kwargs["is_snapshot_deployable"] 3100 3101 if is_table_deployable and is_snapshot_deployable: 3102 # We could deploy this to prod; create a proper managed table 3103 logger.info("Creating managed table: %s", table_name) 3104 self.adapter.create_managed_table( 3105 table_name=table_name, 3106 query=model.render_query_or_raise(**render_kwargs), 3107 target_columns_to_types=model.columns_to_types, 3108 partitioned_by=model.partitioned_by, 3109 clustered_by=model.clustered_by, # type: ignore[arg-type] 3110 table_properties=kwargs.get("physical_properties", model.physical_properties), 3111 table_description=model.description, 3112 column_descriptions=model.column_descriptions, 3113 table_format=model.table_format, 3114 ) 3115 3116 # Apply grants after managed table creation 3117 if not skip_grants: 3118 self._apply_grants( 3119 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3120 ) 3121 3122 elif not is_table_deployable: 3123 # Only create the dev preview table as a normal table. 3124 # For the main table, if the snapshot is cant be deployed to prod (eg upstream is forward-only) do nothing. 3125 # Any downstream models that reference it will be updated to point to the dev preview table. 3126 # If the user eventually tries to deploy it, the logic in insert() will see it doesnt exist and create it 3127 super().create( 3128 table_name=table_name, 3129 model=model, 3130 is_table_deployable=is_table_deployable, 3131 render_kwargs=render_kwargs, 3132 skip_grants=skip_grants, 3133 **kwargs, 3134 ) 3135 3136 def insert( 3137 self, 3138 table_name: str, 3139 query_or_df: QueryOrDF, 3140 model: Model, 3141 is_first_insert: bool, 3142 render_kwargs: t.Dict[str, t.Any], 3143 **kwargs: t.Any, 3144 ) -> None: 3145 deployability_index: DeployabilityIndex = kwargs["deployability_index"] 3146 snapshot: Snapshot = kwargs["snapshot"] 3147 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 3148 if is_first_insert and is_snapshot_deployable and not self.adapter.table_exists(table_name): 3149 self.adapter.create_managed_table( 3150 table_name=table_name, 3151 query=query_or_df, # type: ignore 3152 target_columns_to_types=model.columns_to_types, 3153 partitioned_by=model.partitioned_by, 3154 clustered_by=model.clustered_by, # type: ignore[arg-type] 3155 table_properties=kwargs.get("physical_properties", model.physical_properties), 3156 table_description=model.description, 3157 column_descriptions=model.column_descriptions, 3158 table_format=model.table_format, 3159 ) 3160 self._apply_grants( 3161 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3162 ) 3163 elif not is_snapshot_deployable: 3164 # Snapshot isnt deployable; update the preview table instead 3165 # If the snapshot was deployable, then data would have already been loaded in create() because a managed table would have been created 3166 logger.info( 3167 "Updating preview table: %s (for managed model: %s)", 3168 table_name, 3169 model.name, 3170 ) 3171 self._replace_query_for_model( 3172 model=model, 3173 name=table_name, 3174 query_or_df=query_or_df, 3175 render_kwargs=render_kwargs, 3176 **kwargs, 3177 ) 3178 3179 def append( 3180 self, 3181 table_name: str, 3182 query_or_df: QueryOrDF, 3183 model: Model, 3184 render_kwargs: t.Dict[str, t.Any], 3185 **kwargs: t.Any, 3186 ) -> None: 3187 raise ConfigError(f"Cannot append to a managed table '{table_name}'.") 3188 3189 def migrate( 3190 self, 3191 target_table_name: str, 3192 source_table_name: str, 3193 snapshot: Snapshot, 3194 *, 3195 ignore_destructive: bool, 3196 ignore_additive: bool, 3197 **kwargs: t.Any, 3198 ) -> None: 3199 potential_alter_operations = self.adapter.get_alter_operations( 3200 target_table_name, 3201 source_table_name, 3202 ignore_destructive=ignore_destructive, 3203 ignore_additive=ignore_additive, 3204 ) 3205 if len(potential_alter_operations) > 0: 3206 # this can happen if a user changes a managed model and deliberately overrides a plan to be forward only, eg `sqlmesh plan --forward-only` 3207 raise MigrationNotSupportedError( 3208 f"The schema of the managed model '{target_table_name}' cannot be updated in a forward-only fashion." 3209 ) 3210 3211 # Apply grants after verifying no schema changes 3212 deployability_index = kwargs.get("deployability_index") 3213 is_snapshot_deployable = ( 3214 deployability_index.is_deployable(snapshot) if deployability_index else False 3215 ) 3216 self._apply_grants( 3217 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3218 ) 3219 3220 def delete(self, name: str, **kwargs: t.Any) -> None: 3221 # a dev preview table is created as a normal table, so it needs to be dropped as a normal table 3222 _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) 3223 if kwargs["is_table_deployable"]: 3224 self.adapter.drop_managed_table(name) 3225 logger.info("Dropped managed table '%s'", name) 3226 else: 3227 self.adapter.drop_table(name) 3228 logger.info("Dropped dev preview for managed table '%s'", name) 3229 3230 3231def _intervals(snapshot: Snapshot, deployability_index: DeployabilityIndex) -> Intervals: 3232 return ( 3233 snapshot.intervals 3234 if deployability_index.is_deployable(snapshot) 3235 else snapshot.dev_intervals 3236 ) 3237 3238 3239def _check_destructive_schema_change( 3240 snapshot: Snapshot, 3241 alter_operations: t.List[TableAlterOperation], 3242 allow_destructive_snapshots: t.Set[str], 3243) -> None: 3244 if ( 3245 snapshot.is_no_rebuild 3246 and snapshot.needs_destructive_check(allow_destructive_snapshots) 3247 and has_drop_alteration(alter_operations) 3248 ): 3249 snapshot_name = snapshot.name 3250 model_dialect = snapshot.model.dialect 3251 3252 if snapshot.model.on_destructive_change.is_warn: 3253 logger.warning( 3254 format_destructive_change_msg( 3255 snapshot_name, 3256 alter_operations, 3257 model_dialect, 3258 error=False, 3259 ) 3260 ) 3261 return 3262 raise DestructiveChangeError( 3263 format_destructive_change_msg(snapshot_name, alter_operations, model_dialect) 3264 ) 3265 3266 3267def _check_additive_schema_change( 3268 snapshot: Snapshot, 3269 alter_operations: t.List[TableAlterOperation], 3270 allow_additive_snapshots: t.Set[str], 3271) -> None: 3272 # Only check additive changes for incremental models that have the on_additive_change property 3273 if not isinstance(snapshot.model.kind, _Incremental): 3274 return 3275 3276 if snapshot.needs_additive_check(allow_additive_snapshots) and has_additive_alteration( 3277 alter_operations 3278 ): 3279 # Note: IGNORE filtering is applied before this function is called 3280 # so if we reach here, additive changes are not being ignored 3281 snapshot_name = snapshot.name 3282 model_dialect = snapshot.model.dialect 3283 3284 if snapshot.model.on_additive_change.is_warn: 3285 logger.warning( 3286 format_additive_change_msg( 3287 snapshot_name, 3288 alter_operations, 3289 model_dialect, 3290 error=False, 3291 ) 3292 ) 3293 return 3294 if snapshot.model.on_additive_change.is_error: 3295 raise AdditiveChangeError( 3296 format_additive_change_msg(snapshot_name, alter_operations, model_dialect) 3297 ) 3298 3299 3300def _check_table_db_is_physical_schema(table_name: str, physical_schema: str) -> None: 3301 table = exp.to_table(table_name) 3302 if table.db != physical_schema: 3303 raise SQLMeshError( 3304 f"Table '{table_name}' is not a part of the physical schema '{physical_schema}' and so can't be dropped." 3305 ) 3306 3307 3308def _snapshot_to_data_object_type(snapshot: Snapshot) -> DataObjectType: 3309 if snapshot.is_managed: 3310 return DataObjectType.MANAGED_TABLE 3311 if snapshot.is_materialized_view: 3312 return DataObjectType.MATERIALIZED_VIEW 3313 if snapshot.is_view: 3314 return DataObjectType.VIEW 3315 if snapshot.is_materialized: 3316 return DataObjectType.TABLE 3317 return DataObjectType.UNKNOWN
103class SnapshotCreationFailedError(SQLMeshError): 104 def __init__( 105 self, errors: t.List[NodeExecutionFailedError[SnapshotId]], skipped: t.List[SnapshotId] 106 ): 107 messages = "\n\n".join(f"{error}\n {error.__cause__}" for error in errors) 108 super().__init__(f"Physical table creation failed:\n\n{messages}") 109 self.errors = errors 110 self.skipped = skipped
Common base class for all non-exit exceptions.
104 def __init__( 105 self, errors: t.List[NodeExecutionFailedError[SnapshotId]], skipped: t.List[SnapshotId] 106 ): 107 messages = "\n\n".join(f"{error}\n {error.__cause__}" for error in errors) 108 super().__init__(f"Physical table creation failed:\n\n{messages}") 109 self.errors = errors 110 self.skipped = skipped
Inherited Members
- builtins.BaseException
- with_traceback
- args
113class SnapshotEvaluator: 114 """Evaluates a snapshot given runtime arguments through an arbitrary EngineAdapter. 115 116 The SnapshotEvaluator contains the business logic to generically evaluate a snapshot. 117 It is responsible for delegating queries to the EngineAdapter. The SnapshotEvaluator 118 does not directly communicate with the underlying execution engine. 119 120 Args: 121 adapters: A single EngineAdapter or a dictionary of EngineAdapters where 122 the key is the gateway name. When a dictionary is provided, and not an 123 explicit default gateway its first item is treated as the default 124 adapter and used for the virtual layer. 125 ddl_concurrent_tasks: The number of concurrent tasks used for DDL 126 operations (table / view creation, deletion, etc). Default: 1. 127 """ 128 129 def __init__( 130 self, 131 adapters: EngineAdapter | t.Dict[str, EngineAdapter], 132 ddl_concurrent_tasks: int = 1, 133 selected_gateway: t.Optional[str] = None, 134 ): 135 self.adapters = ( 136 adapters if isinstance(adapters, t.Dict) else {selected_gateway or "": adapters} 137 ) 138 self.execution_tracker = QueryExecutionTracker() 139 self.adapters = { 140 gateway: adapter.with_settings(query_execution_tracker=self.execution_tracker) 141 for gateway, adapter in self.adapters.items() 142 } 143 self.adapter = ( 144 next(iter(self.adapters.values())) 145 if not selected_gateway 146 else self.adapters[selected_gateway] 147 ) 148 self.selected_gateway = selected_gateway 149 self.ddl_concurrent_tasks = ddl_concurrent_tasks 150 151 def evaluate( 152 self, 153 snapshot: Snapshot, 154 *, 155 start: TimeLike, 156 end: TimeLike, 157 execution_time: TimeLike, 158 snapshots: t.Dict[str, Snapshot], 159 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 160 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 161 deployability_index: t.Optional[DeployabilityIndex] = None, 162 batch_index: int = 0, 163 target_table_exists: t.Optional[bool] = None, 164 **kwargs: t.Any, 165 ) -> t.Optional[str]: 166 """Renders the snapshot's model, executes it and stores the result in the snapshot's physical table. 167 168 Args: 169 snapshot: Snapshot to evaluate. 170 start: The start datetime to render. 171 end: The end datetime to render. 172 execution_time: The date/time time reference to use for execution time. 173 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 174 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 175 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 176 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 177 batch_index: If the snapshot is part of a batch of related snapshots; which index in the batch is it 178 target_table_exists: Whether the target table exists. If None, the table will be checked for existence. 179 kwargs: Additional kwargs to pass to the renderer. 180 181 Returns: 182 The WAP ID of this evaluation if supported, None otherwise. 183 """ 184 with self.execution_tracker.track_execution( 185 SnapshotIdBatch(snapshot_id=snapshot.snapshot_id, batch_id=batch_index) 186 ): 187 result = self._evaluate_snapshot( 188 start=start, 189 end=end, 190 execution_time=execution_time, 191 snapshot=snapshot, 192 snapshots=snapshots, 193 allow_destructive_snapshots=allow_destructive_snapshots or set(), 194 allow_additive_snapshots=allow_additive_snapshots or set(), 195 deployability_index=deployability_index, 196 batch_index=batch_index, 197 target_table_exists=target_table_exists, 198 **kwargs, 199 ) 200 if result is None or isinstance(result, str): 201 return result 202 raise SQLMeshError( 203 f"Unexpected result {result} when evaluating snapshot {snapshot.snapshot_id}." 204 ) 205 206 def evaluate_and_fetch( 207 self, 208 snapshot: Snapshot, 209 *, 210 start: TimeLike, 211 end: TimeLike, 212 execution_time: TimeLike, 213 snapshots: t.Dict[str, Snapshot], 214 limit: int, 215 deployability_index: t.Optional[DeployabilityIndex] = None, 216 **kwargs: t.Any, 217 ) -> DF: 218 """Renders the snapshot's model, executes it and returns a dataframe with the result. 219 220 Args: 221 snapshot: Snapshot to evaluate. 222 start: The start datetime to render. 223 end: The end datetime to render. 224 execution_time: The date/time time reference to use for execution time. 225 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 226 limit: The maximum number of rows to fetch. 227 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 228 kwargs: Additional kwargs to pass to the renderer. 229 230 Returns: 231 The result of the evaluation as a dataframe. 232 """ 233 import pandas as pd 234 235 adapter = self.get_adapter(snapshot.model.gateway) 236 render_kwargs = dict( 237 start=start, 238 end=end, 239 execution_time=execution_time, 240 snapshot=snapshot, 241 runtime_stage=RuntimeStage.EVALUATING, 242 **kwargs, 243 ) 244 queries_or_dfs = self._render_snapshot_for_evaluation( 245 snapshot, 246 snapshots, 247 deployability_index or DeployabilityIndex.all_deployable(), 248 render_kwargs, 249 ) 250 query_or_df = next(queries_or_dfs) 251 if isinstance(query_or_df, pd.DataFrame): 252 return query_or_df.head(limit) 253 if not isinstance(query_or_df, exp.Expr): 254 # We assume that if this branch is reached, `query_or_df` is a pyspark / snowpark / bigframe dataframe, 255 # so we use `limit` instead of `head` to get back a dataframe instead of List[Row] 256 # https://spark.apache.org/docs/3.1.1/api/python/reference/api/pyspark.sql.DataFrame.head.html#pyspark.sql.DataFrame.head 257 return query_or_df.limit(limit) 258 259 assert isinstance(query_or_df, exp.Query) 260 261 existing_limit = query_or_df.args.get("limit") 262 if existing_limit: 263 limit = min(limit, execute(exp.select(existing_limit.expression)).rows[0][0]) 264 assert limit is not None 265 266 return adapter._fetch_native_df(query_or_df.limit(limit)) 267 268 def promote( 269 self, 270 target_snapshots: t.Iterable[Snapshot], 271 environment_naming_info: EnvironmentNamingInfo, 272 deployability_index: t.Optional[DeployabilityIndex] = None, 273 start: t.Optional[TimeLike] = None, 274 end: t.Optional[TimeLike] = None, 275 execution_time: t.Optional[TimeLike] = None, 276 snapshots: t.Optional[t.Dict[SnapshotId, Snapshot]] = None, 277 table_mapping: t.Optional[t.Dict[str, str]] = None, 278 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 279 ) -> None: 280 """Promotes the given collection of snapshots in the target environment by replacing a corresponding 281 view with a physical table associated with the given snapshot. 282 283 Args: 284 target_snapshots: Snapshots to promote. 285 environment_naming_info: Naming information for the target environment. 286 deployability_index: Determines snapshots that are deployable in the context of this promotion. 287 on_complete: A callback to call on each successfully promoted snapshot. 288 """ 289 290 tables_by_gateway: t.Dict[t.Union[str, None], t.List[exp.Table]] = defaultdict(list) 291 for snapshot in target_snapshots: 292 if snapshot.is_model and not snapshot.is_symbolic: 293 gateway = ( 294 snapshot.model_gateway if environment_naming_info.gateway_managed else None 295 ) 296 adapter = self.get_adapter(gateway) 297 table = snapshot.qualified_view_name.table_for_environment( 298 environment_naming_info, dialect=adapter.dialect 299 ) 300 tables_by_gateway[gateway].append(table) 301 302 # A schema can be shared across multiple engines, so we need to group by gateway 303 for gateway, tables in tables_by_gateway.items(): 304 if environment_naming_info.suffix_target.is_catalog: 305 self._create_catalogs(tables=tables, gateway=gateway) 306 307 gateway_table_pairs = [ 308 (gateway, table) for gateway, tables in tables_by_gateway.items() for table in tables 309 ] 310 self._create_schemas(gateway_table_pairs=gateway_table_pairs) 311 312 # Fetch the view data objects for the promoted snapshots to get them cached 313 self._get_virtual_data_objects(target_snapshots, environment_naming_info) 314 315 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 316 with self.concurrent_context(): 317 concurrent_apply_to_snapshots( 318 target_snapshots, 319 lambda s: self._promote_snapshot( 320 s, 321 start=start, 322 end=end, 323 execution_time=execution_time, 324 snapshots=snapshots, 325 table_mapping=table_mapping, 326 environment_naming_info=environment_naming_info, 327 deployability_index=deployability_index, # type: ignore 328 on_complete=on_complete, 329 ), 330 self.ddl_concurrent_tasks, 331 ) 332 333 def demote( 334 self, 335 target_snapshots: t.Iterable[Snapshot], 336 environment_naming_info: EnvironmentNamingInfo, 337 table_mapping: t.Optional[t.Dict[str, str]] = None, 338 deployability_index: t.Optional[DeployabilityIndex] = None, 339 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 340 ) -> None: 341 """Demotes the given collection of snapshots in the target environment by removing its view. 342 343 Args: 344 target_snapshots: Snapshots to demote. 345 environment_naming_info: Naming info for the target environment. 346 on_complete: A callback to call on each successfully demoted snapshot. 347 """ 348 with self.concurrent_context(): 349 concurrent_apply_to_snapshots( 350 target_snapshots, 351 lambda s: self._demote_snapshot( 352 s, 353 environment_naming_info, 354 deployability_index=deployability_index, 355 on_complete=on_complete, 356 table_mapping=table_mapping, 357 ), 358 self.ddl_concurrent_tasks, 359 ) 360 361 def create( 362 self, 363 target_snapshots: t.Iterable[Snapshot], 364 snapshots: t.Dict[SnapshotId, Snapshot], 365 deployability_index: t.Optional[DeployabilityIndex] = None, 366 on_start: t.Optional[t.Callable] = None, 367 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 368 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 369 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 370 ) -> CompletionStatus: 371 """Creates a physical snapshot schema and table for the given collection of snapshots. 372 373 Args: 374 target_snapshots: Target snapshots. 375 snapshots: Mapping of snapshot ID to snapshot. 376 deployability_index: Determines snapshots that are deployable in the context of this creation. 377 on_start: A callback to initialize the snapshot creation progress bar. 378 on_complete: A callback to call on each successfully created snapshot. 379 allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes. 380 allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes. 381 382 Returns: 383 CompletionStatus: The status of the creation operation (success, failure, nothing to do). 384 """ 385 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 386 387 snapshots_to_create = self.get_snapshots_to_create(target_snapshots, deployability_index) 388 if not snapshots_to_create: 389 return CompletionStatus.NOTHING_TO_DO 390 if on_start: 391 on_start(snapshots_to_create) 392 393 self._create_snapshots( 394 snapshots_to_create=snapshots_to_create, 395 snapshots={s.name: s for s in snapshots.values()}, 396 deployability_index=deployability_index, 397 on_complete=on_complete, 398 allow_destructive_snapshots=allow_destructive_snapshots or set(), 399 allow_additive_snapshots=allow_additive_snapshots or set(), 400 ) 401 return CompletionStatus.SUCCESS 402 403 def create_physical_schemas( 404 self, snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex 405 ) -> None: 406 """Creates the physical schemas for the given snapshots. 407 408 Args: 409 snapshots: Snapshots to create physical schemas for. 410 deployability_index: Determines snapshots that are deployable in the context of this creation. 411 """ 412 tables_by_gateway: t.Dict[t.Optional[str], t.List[str]] = defaultdict(list) 413 for snapshot in snapshots: 414 if snapshot.is_model and not snapshot.is_symbolic: 415 tables_by_gateway[snapshot.model_gateway].append( 416 snapshot.table_name(is_deployable=deployability_index.is_deployable(snapshot)) 417 ) 418 419 gateway_table_pairs = [ 420 (gateway, table) for gateway, tables in tables_by_gateway.items() for table in tables 421 ] 422 self._create_schemas(gateway_table_pairs=gateway_table_pairs) 423 424 def get_snapshots_to_create( 425 self, target_snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex 426 ) -> t.List[Snapshot]: 427 """Returns a list of snapshots that need to have their physical tables created. 428 429 Args: 430 target_snapshots: Target snapshots. 431 deployability_index: Determines snapshots that are deployable / representative in the context of this creation. 432 """ 433 existing_data_objects = self._get_physical_data_objects( 434 target_snapshots, deployability_index 435 ) 436 snapshots_to_create = [] 437 for snapshot in target_snapshots: 438 if not snapshot.is_model or snapshot.is_symbolic: 439 continue 440 if snapshot.snapshot_id not in existing_data_objects or ( 441 snapshot.is_seed and not snapshot.intervals 442 ): 443 snapshots_to_create.append(snapshot) 444 445 return snapshots_to_create 446 447 def _create_snapshots( 448 self, 449 snapshots_to_create: t.Iterable[Snapshot], 450 snapshots: t.Dict[str, Snapshot], 451 deployability_index: DeployabilityIndex, 452 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 453 allow_destructive_snapshots: t.Set[str], 454 allow_additive_snapshots: t.Set[str], 455 ) -> None: 456 """Internal method to create tables in parallel.""" 457 with self.concurrent_context(): 458 errors, skipped = concurrent_apply_to_snapshots( 459 snapshots_to_create, 460 lambda s: self.create_snapshot( 461 s, 462 snapshots=snapshots, 463 deployability_index=deployability_index, 464 allow_destructive_snapshots=allow_destructive_snapshots, 465 allow_additive_snapshots=allow_additive_snapshots, 466 on_complete=on_complete, 467 ), 468 self.ddl_concurrent_tasks, 469 raise_on_error=False, 470 ) 471 if errors: 472 raise SnapshotCreationFailedError(errors, skipped) 473 474 def migrate( 475 self, 476 target_snapshots: t.Iterable[Snapshot], 477 snapshots: t.Dict[SnapshotId, Snapshot], 478 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 479 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 480 deployability_index: t.Optional[DeployabilityIndex] = None, 481 ) -> None: 482 """Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots. 483 484 Args: 485 target_snapshots: Target snapshots. 486 snapshots: Mapping of snapshot ID to snapshot. 487 allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes. 488 allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes. 489 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 490 """ 491 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 492 target_data_objects = self._get_physical_data_objects(target_snapshots, deployability_index) 493 if not target_data_objects: 494 return 495 496 if not snapshots: 497 snapshots = {s.snapshot_id: s for s in target_snapshots} 498 499 allow_destructive_snapshots = allow_destructive_snapshots or set() 500 allow_additive_snapshots = allow_additive_snapshots or set() 501 snapshots_by_name = {s.name: s for s in snapshots.values()} 502 with self.concurrent_context(): 503 # Only migrate snapshots for which there's an existing data object 504 concurrent_apply_to_snapshots( 505 target_snapshots, 506 lambda s: self._migrate_snapshot( 507 s, 508 snapshots_by_name, 509 target_data_objects.get(s.snapshot_id), 510 allow_destructive_snapshots, 511 allow_additive_snapshots, 512 self.get_adapter(s.model_gateway), 513 deployability_index, 514 ), 515 self.ddl_concurrent_tasks, 516 ) 517 518 def cleanup( 519 self, 520 target_snapshots: t.Iterable[SnapshotTableCleanupTask], 521 on_complete: t.Optional[t.Callable[[str], None]] = None, 522 ) -> None: 523 """Cleans up the given snapshots by removing its table 524 525 Args: 526 target_snapshots: Snapshots to cleanup. 527 on_complete: A callback to call on each successfully deleted database object. 528 """ 529 target_snapshots = [ 530 t for t in target_snapshots if t.snapshot.is_model and not t.snapshot.is_symbolic 531 ] 532 snapshots_to_dev_table_only = { 533 t.snapshot.snapshot_id: t.dev_table_only for t in target_snapshots 534 } 535 with self.concurrent_context(): 536 concurrent_apply_to_snapshots( 537 [t.snapshot for t in target_snapshots], 538 lambda s: self._cleanup_snapshot( 539 s, 540 snapshots_to_dev_table_only[s.snapshot_id], 541 self.get_adapter(s.model_gateway), 542 on_complete, 543 ), 544 self.ddl_concurrent_tasks, 545 reverse_order=True, 546 ) 547 548 def audit( 549 self, 550 snapshot: Snapshot, 551 *, 552 snapshots: t.Dict[str, Snapshot], 553 start: t.Optional[TimeLike] = None, 554 end: t.Optional[TimeLike] = None, 555 execution_time: t.Optional[TimeLike] = None, 556 deployability_index: t.Optional[DeployabilityIndex] = None, 557 wap_id: t.Optional[str] = None, 558 **kwargs: t.Any, 559 ) -> t.List[AuditResult]: 560 """Execute a snapshot's node's audit queries. 561 562 Args: 563 snapshot: Snapshot to evaluate. 564 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 565 start: The start datetime to audit. Defaults to epoch start. 566 end: The end datetime to audit. Defaults to epoch start. 567 execution_time: The date/time time reference to use for execution time. 568 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 569 wap_id: The WAP ID if applicable, None otherwise. 570 kwargs: Additional kwargs to pass to the renderer. 571 """ 572 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 573 adapter = self.get_adapter(snapshot.model_gateway) 574 575 if not snapshot.version: 576 raise ConfigError( 577 f"Cannot audit '{snapshot.name}' because it has not been versioned yet. Apply a plan first." 578 ) 579 580 if wap_id is not None: 581 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 582 original_table_name = snapshot.table_name( 583 is_deployable=deployability_index.is_deployable(snapshot) 584 ) 585 wap_table_name = adapter.wap_table_name(original_table_name, wap_id) 586 logger.info( 587 "Auditing WAP table '%s', snapshot %s", 588 wap_table_name, 589 snapshot.snapshot_id, 590 ) 591 592 table_mapping = kwargs.get("table_mapping") or {} 593 table_mapping[snapshot.name] = wap_table_name 594 kwargs["table_mapping"] = table_mapping 595 kwargs["this_model"] = exp.to_table(wap_table_name, dialect=adapter.dialect) 596 597 results = [] 598 599 audits_with_args = snapshot.node.audits_with_args 600 601 force_non_blocking = False 602 603 if audits_with_args: 604 logger.info("Auditing snapshot %s", snapshot.snapshot_id) 605 606 if not deployability_index.is_deployable(snapshot) and not adapter.SUPPORTS_CLONING: 607 # For dev preview tables that aren't based on clones of the production table, only a subset of the data is typically available 608 # However, users still expect audits to run anwyay. Some audits (such as row count) are practically guaranteed to fail 609 # when run on only a subset of data, so we switch all audits to non blocking and the user can decide if they still want to proceed 610 force_non_blocking = True 611 612 for audit, audit_args in audits_with_args: 613 if force_non_blocking: 614 # remove any blocking indicator on the model itself 615 audit_args.pop("blocking", None) 616 # so that we can fall back to the audit's setting, which we override to blocking: False 617 audit = audit.model_copy(update={"blocking": False}) 618 619 results.append( 620 self._audit( 621 audit=audit, 622 audit_args=audit_args, 623 snapshot=snapshot, 624 snapshots=snapshots, 625 start=start, 626 end=end, 627 execution_time=execution_time, 628 deployability_index=deployability_index, 629 **kwargs, 630 ) 631 ) 632 633 if wap_id is not None: 634 logger.info( 635 "Publishing evaluation results for snapshot %s, WAP ID '%s'", 636 snapshot.snapshot_id, 637 wap_id, 638 ) 639 self.wap_publish_snapshot(snapshot, wap_id, deployability_index) 640 641 return results 642 643 @contextmanager 644 def concurrent_context(self) -> t.Iterator[None]: 645 try: 646 yield 647 finally: 648 self.recycle() 649 650 def recycle(self) -> None: 651 """Closes all open connections and releases all allocated resources associated with any thread 652 except the calling one.""" 653 try: 654 for adapter in self.adapters.values(): 655 adapter.recycle() 656 657 except Exception: 658 logger.exception("Failed to recycle Snapshot Evaluator") 659 660 def close(self) -> None: 661 """Closes all open connections and releases all allocated resources.""" 662 try: 663 for adapter in self.adapters.values(): 664 adapter.close() 665 except Exception: 666 logger.exception("Failed to close Snapshot Evaluator") 667 668 def set_correlation_id(self, correlation_id: CorrelationId) -> SnapshotEvaluator: 669 return SnapshotEvaluator( 670 { 671 gateway: adapter.with_settings(correlation_id=correlation_id) 672 for gateway, adapter in self.adapters.items() 673 }, 674 self.ddl_concurrent_tasks, 675 self.selected_gateway, 676 ) 677 678 def _evaluate_snapshot( 679 self, 680 start: TimeLike, 681 end: TimeLike, 682 execution_time: TimeLike, 683 snapshot: Snapshot, 684 snapshots: t.Dict[str, Snapshot], 685 allow_destructive_snapshots: t.Set[str], 686 allow_additive_snapshots: t.Set[str], 687 deployability_index: t.Optional[DeployabilityIndex], 688 batch_index: int, 689 target_table_exists: t.Optional[bool], 690 **kwargs: t.Any, 691 ) -> t.Optional[str]: 692 """Renders the snapshot's model and executes it. The return value depends on whether the limit was specified. 693 694 Args: 695 snapshot: Snapshot to evaluate. 696 start: The start datetime to render. 697 end: The end datetime to render. 698 execution_time: The date/time time reference to use for execution time. 699 snapshots: All upstream snapshots to use for expansion and mapping of physical locations. 700 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 701 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 702 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 703 batch_index: If the snapshot is part of a batch of related snapshots; which index in the batch is it 704 target_table_exists: Whether the target table exists. If None, the table will be checked for existence. 705 kwargs: Additional kwargs to pass to the renderer. 706 """ 707 if not snapshot.is_model: 708 return None 709 710 model = snapshot.model 711 712 logger.info("Evaluating snapshot %s", snapshot.snapshot_id) 713 714 adapter = self.get_adapter(model.gateway) 715 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 716 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 717 target_table_name = snapshot.table_name(is_deployable=is_snapshot_deployable) 718 # https://github.com/SQLMesh/sqlmesh/issues/2609 719 # If there are no existing intervals yet; only consider this a first insert for the first snapshot in the batch 720 if target_table_exists is None: 721 target_table_exists = adapter.table_exists(target_table_name) 722 is_first_insert = ( 723 not _intervals(snapshot, deployability_index) or not target_table_exists 724 ) and batch_index == 0 725 726 # Use the 'creating' stage if the table doesn't exist yet to preserve backwards compatibility with existing projects 727 # that depend on a separate physical table creation stage. 728 runtime_stage = RuntimeStage.EVALUATING if target_table_exists else RuntimeStage.CREATING 729 common_render_kwargs = dict( 730 start=start, 731 end=end, 732 execution_time=execution_time, 733 snapshot=snapshot, 734 runtime_stage=runtime_stage, 735 **kwargs, 736 ) 737 create_render_kwargs = dict( 738 engine_adapter=adapter, 739 snapshots=snapshots, 740 deployability_index=deployability_index, 741 **common_render_kwargs, 742 ) 743 create_render_kwargs["runtime_stage"] = RuntimeStage.CREATING 744 render_statements_kwargs = dict( 745 engine_adapter=adapter, 746 snapshots=snapshots, 747 deployability_index=deployability_index, 748 **common_render_kwargs, 749 ) 750 rendered_physical_properties = snapshot.model.render_physical_properties( 751 **render_statements_kwargs 752 ) 753 754 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 755 evaluation_strategy.run_pre_statements( 756 snapshot=snapshot, 757 render_kwargs={**render_statements_kwargs, "inside_transaction": False}, 758 ) 759 760 with ( 761 adapter.transaction(), 762 adapter.session(snapshot.model.render_session_properties(**render_statements_kwargs)), 763 ): 764 evaluation_strategy.run_pre_statements( 765 snapshot=snapshot, 766 render_kwargs={**render_statements_kwargs, "inside_transaction": True}, 767 ) 768 769 if not target_table_exists or (model.is_seed and not snapshot.intervals): 770 # Only create the empty table if the columns were provided explicitly by the user 771 should_create_empty_table = ( 772 model.kind.is_materialized 773 and model.columns_to_types_ 774 and columns_to_types_all_known(model.columns_to_types_) 775 ) 776 if not should_create_empty_table: 777 # Or if the model is self-referential and its query is fully annotated with types 778 should_create_empty_table = model.depends_on_self and model.annotated 779 if self._can_clone(snapshot, deployability_index): 780 self._clone_snapshot_in_dev( 781 snapshot=snapshot, 782 snapshots=snapshots, 783 deployability_index=deployability_index, 784 render_kwargs=create_render_kwargs, 785 rendered_physical_properties=rendered_physical_properties.copy(), 786 allow_destructive_snapshots=allow_destructive_snapshots, 787 allow_additive_snapshots=allow_additive_snapshots, 788 ) 789 runtime_stage = RuntimeStage.EVALUATING 790 target_table_exists = True 791 elif should_create_empty_table or model.is_seed or model.kind.is_scd_type_2: 792 self._execute_create( 793 snapshot=snapshot, 794 table_name=target_table_name, 795 is_table_deployable=is_snapshot_deployable, 796 deployability_index=deployability_index, 797 create_render_kwargs=create_render_kwargs, 798 rendered_physical_properties=rendered_physical_properties.copy(), 799 dry_run=False, 800 run_pre_post_statements=False, 801 ) 802 runtime_stage = RuntimeStage.EVALUATING 803 target_table_exists = True 804 805 evaluate_render_kwargs = { 806 **common_render_kwargs, 807 "runtime_stage": runtime_stage, 808 "snapshot_table_exists": target_table_exists, 809 } 810 811 wap_id: t.Optional[str] = None 812 if ( 813 snapshot.is_materialized 814 and target_table_exists 815 and adapter.wap_enabled 816 and (model.wap_supported or adapter.wap_supported(target_table_name)) 817 ): 818 wap_id = random_id()[0:8] 819 logger.info("Using WAP ID '%s' for snapshot %s", wap_id, snapshot.snapshot_id) 820 target_table_name = adapter.wap_prepare(target_table_name, wap_id) 821 822 self._render_and_insert_snapshot( 823 start=start, 824 end=end, 825 execution_time=execution_time, 826 snapshot=snapshot, 827 snapshots=snapshots, 828 render_kwargs=evaluate_render_kwargs, 829 create_render_kwargs=create_render_kwargs, 830 rendered_physical_properties=rendered_physical_properties, 831 deployability_index=deployability_index, 832 target_table_name=target_table_name, 833 is_first_insert=is_first_insert, 834 batch_index=batch_index, 835 ) 836 837 evaluation_strategy.run_post_statements( 838 snapshot=snapshot, 839 render_kwargs={**render_statements_kwargs, "inside_transaction": True}, 840 ) 841 842 evaluation_strategy.run_post_statements( 843 snapshot=snapshot, 844 render_kwargs={**render_statements_kwargs, "inside_transaction": False}, 845 ) 846 847 return wap_id 848 849 def create_snapshot( 850 self, 851 snapshot: Snapshot, 852 snapshots: t.Dict[str, Snapshot], 853 deployability_index: DeployabilityIndex, 854 allow_destructive_snapshots: t.Set[str], 855 allow_additive_snapshots: t.Set[str], 856 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 857 ) -> None: 858 """Creates a physical table for the given snapshot. 859 860 Args: 861 snapshot: Snapshot to create. 862 snapshots: All upstream snapshots to use for expansion and mapping of physical locations. 863 deployability_index: Determines snapshots that are deployable in the context of this creation. 864 on_complete: A callback to call on each successfully created database object. 865 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 866 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 867 """ 868 if not snapshot.is_model: 869 return 870 871 logger.info("Creating a physical table for snapshot %s", snapshot.snapshot_id) 872 873 adapter = self.get_adapter(snapshot.model.gateway) 874 create_render_kwargs: t.Dict[str, t.Any] = dict( 875 engine_adapter=adapter, 876 snapshots=snapshots, 877 runtime_stage=RuntimeStage.CREATING, 878 deployability_index=deployability_index, 879 ) 880 881 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 882 evaluation_strategy.run_pre_statements( 883 snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} 884 ) 885 886 with ( 887 adapter.transaction(), 888 adapter.session(snapshot.model.render_session_properties(**create_render_kwargs)), 889 ): 890 rendered_physical_properties = snapshot.model.render_physical_properties( 891 **create_render_kwargs 892 ) 893 894 if self._can_clone(snapshot, deployability_index): 895 self._clone_snapshot_in_dev( 896 snapshot=snapshot, 897 snapshots=snapshots, 898 deployability_index=deployability_index, 899 render_kwargs=create_render_kwargs, 900 rendered_physical_properties=rendered_physical_properties, 901 allow_destructive_snapshots=allow_destructive_snapshots, 902 allow_additive_snapshots=allow_additive_snapshots, 903 run_pre_post_statements=True, 904 ) 905 else: 906 is_table_deployable = deployability_index.is_deployable(snapshot) 907 self._execute_create( 908 snapshot=snapshot, 909 table_name=snapshot.table_name(is_deployable=is_table_deployable), 910 is_table_deployable=is_table_deployable, 911 deployability_index=deployability_index, 912 create_render_kwargs=create_render_kwargs, 913 rendered_physical_properties=rendered_physical_properties, 914 dry_run=True, 915 ) 916 917 evaluation_strategy.run_post_statements( 918 snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} 919 ) 920 921 if on_complete is not None: 922 on_complete(snapshot) 923 924 def wap_publish_snapshot( 925 self, 926 snapshot: Snapshot, 927 wap_id: str, 928 deployability_index: t.Optional[DeployabilityIndex], 929 ) -> None: 930 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 931 table_name = snapshot.table_name(is_deployable=deployability_index.is_deployable(snapshot)) 932 adapter = self.get_adapter(snapshot.model_gateway) 933 adapter.wap_publish(table_name, wap_id) 934 935 def _render_and_insert_snapshot( 936 self, 937 start: TimeLike, 938 end: TimeLike, 939 execution_time: TimeLike, 940 snapshot: Snapshot, 941 snapshots: t.Dict[str, Snapshot], 942 render_kwargs: t.Dict[str, t.Any], 943 create_render_kwargs: t.Dict[str, t.Any], 944 rendered_physical_properties: t.Dict[str, exp.Expr], 945 deployability_index: DeployabilityIndex, 946 target_table_name: str, 947 is_first_insert: bool, 948 batch_index: int, 949 ) -> None: 950 if not snapshot.is_model or snapshot.is_seed: 951 return 952 953 logger.info("Inserting data for snapshot %s", snapshot.snapshot_id) 954 955 model = snapshot.model 956 adapter = self.get_adapter(model.gateway) 957 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 958 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 959 960 queries_or_dfs = self._render_snapshot_for_evaluation( 961 snapshot, 962 snapshots, 963 deployability_index, 964 render_kwargs, 965 ) 966 967 def apply(query_or_df: QueryOrDF, index: int = 0) -> None: 968 if index > 0: 969 evaluation_strategy.append( 970 table_name=target_table_name, 971 query_or_df=query_or_df, 972 model=snapshot.model, 973 snapshot=snapshot, 974 snapshots=snapshots, 975 deployability_index=deployability_index, 976 batch_index=batch_index, 977 start=start, 978 end=end, 979 execution_time=execution_time, 980 physical_properties=rendered_physical_properties, 981 render_kwargs=create_render_kwargs, 982 is_snapshot_deployable=is_snapshot_deployable, 983 ) 984 else: 985 logger.info( 986 "Inserting batch (%s, %s) into %s'", 987 time_like_to_str(start), 988 time_like_to_str(end), 989 target_table_name, 990 ) 991 evaluation_strategy.insert( 992 table_name=target_table_name, 993 query_or_df=query_or_df, 994 is_first_insert=is_first_insert, 995 model=snapshot.model, 996 snapshot=snapshot, 997 snapshots=snapshots, 998 deployability_index=deployability_index, 999 batch_index=batch_index, 1000 start=start, 1001 end=end, 1002 execution_time=execution_time, 1003 physical_properties=rendered_physical_properties, 1004 render_kwargs=create_render_kwargs, 1005 is_snapshot_deployable=is_snapshot_deployable, 1006 ) 1007 1008 # DataFrames, unlike SQL expressions, can provide partial results by yielding dataframes. As a result, 1009 # if the engine supports INSERT OVERWRITE or REPLACE WHERE and the snapshot is incremental by time range, we risk 1010 # having a partial result since each dataframe write can re-truncate partitions. To avoid this, we 1011 # union all the dataframes together before writing. For pandas this could result in OOM and a potential 1012 # workaround for that would be to serialize pandas to disk and then read it back with Spark. 1013 # Note: We assume that if multiple things are yielded from `queries_or_dfs` that they are dataframes 1014 # and not SQL expressions. 1015 if ( 1016 adapter.INSERT_OVERWRITE_STRATEGY 1017 in ( 1018 InsertOverwriteStrategy.INSERT_OVERWRITE, 1019 InsertOverwriteStrategy.REPLACE_WHERE, 1020 ) 1021 and snapshot.is_incremental_by_time_range 1022 ): 1023 import pandas as pd 1024 1025 try: 1026 first_query_or_df = next(queries_or_dfs) 1027 except StopIteration: 1028 return 1029 1030 query_or_df = reduce( 1031 lambda a, b: ( 1032 pd.concat([a, b], ignore_index=True) # type: ignore 1033 if isinstance(a, pd.DataFrame) 1034 else a.union_all(b) # type: ignore 1035 ), # type: ignore 1036 queries_or_dfs, 1037 first_query_or_df, 1038 ) 1039 apply(query_or_df, index=0) 1040 else: 1041 for index, query_or_df in enumerate(queries_or_dfs): 1042 apply(query_or_df, index) 1043 1044 def _render_snapshot_for_evaluation( 1045 self, 1046 snapshot: Snapshot, 1047 snapshots: t.Dict[str, Snapshot], 1048 deployability_index: DeployabilityIndex, 1049 render_kwargs: t.Dict[str, t.Any], 1050 ) -> t.Iterator[QueryOrDF]: 1051 from sqlmesh.core.context import ExecutionContext 1052 1053 model = snapshot.model 1054 adapter = self.get_adapter(model.gateway) 1055 1056 return model.render( 1057 context=ExecutionContext( 1058 adapter, 1059 snapshots, 1060 deployability_index, 1061 default_dialect=model.dialect, 1062 default_catalog=model.default_catalog, 1063 ), 1064 **render_kwargs, 1065 ) 1066 1067 def _clone_snapshot_in_dev( 1068 self, 1069 snapshot: Snapshot, 1070 snapshots: t.Dict[str, Snapshot], 1071 deployability_index: DeployabilityIndex, 1072 render_kwargs: t.Dict[str, t.Any], 1073 rendered_physical_properties: t.Dict[str, exp.Expr], 1074 allow_destructive_snapshots: t.Set[str], 1075 allow_additive_snapshots: t.Set[str], 1076 run_pre_post_statements: bool = False, 1077 ) -> None: 1078 adapter = self.get_adapter(snapshot.model.gateway) 1079 1080 target_table_name = snapshot.table_name(is_deployable=False) 1081 source_table_name = snapshot.table_name() 1082 1083 try: 1084 logger.info(f"Cloning table '{source_table_name}' into '{target_table_name}'") 1085 adapter.clone_table( 1086 target_table_name, 1087 snapshot.table_name(), 1088 rendered_physical_properties=rendered_physical_properties, 1089 ) 1090 self._migrate_target_table( 1091 target_table_name=target_table_name, 1092 snapshot=snapshot, 1093 snapshots=snapshots, 1094 deployability_index=deployability_index, 1095 render_kwargs=render_kwargs, 1096 rendered_physical_properties=rendered_physical_properties, 1097 allow_destructive_snapshots=allow_destructive_snapshots, 1098 allow_additive_snapshots=allow_additive_snapshots, 1099 run_pre_post_statements=run_pre_post_statements, 1100 ) 1101 1102 except Exception: 1103 adapter.drop_table(target_table_name) 1104 raise 1105 1106 def _migrate_snapshot( 1107 self, 1108 snapshot: Snapshot, 1109 snapshots: t.Dict[str, Snapshot], 1110 target_data_object: t.Optional[DataObject], 1111 allow_destructive_snapshots: t.Set[str], 1112 allow_additive_snapshots: t.Set[str], 1113 adapter: EngineAdapter, 1114 deployability_index: DeployabilityIndex, 1115 ) -> None: 1116 if not snapshot.is_model or snapshot.is_symbolic: 1117 return 1118 1119 deployability_index = DeployabilityIndex.all_deployable() 1120 render_kwargs: t.Dict[str, t.Any] = dict( 1121 engine_adapter=adapter, 1122 snapshots=snapshots, 1123 runtime_stage=RuntimeStage.CREATING, 1124 deployability_index=deployability_index, 1125 ) 1126 target_table_name = snapshot.table_name() 1127 1128 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1129 evaluation_strategy.run_pre_statements( 1130 snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} 1131 ) 1132 1133 with ( 1134 adapter.transaction(), 1135 adapter.session(snapshot.model.render_session_properties(**render_kwargs)), 1136 ): 1137 table_exists = target_data_object is not None 1138 if adapter.drop_data_object_on_type_mismatch( 1139 target_data_object, _snapshot_to_data_object_type(snapshot) 1140 ): 1141 table_exists = False 1142 1143 rendered_physical_properties = snapshot.model.render_physical_properties( 1144 **render_kwargs 1145 ) 1146 1147 if table_exists: 1148 self._migrate_target_table( 1149 target_table_name=target_table_name, 1150 snapshot=snapshot, 1151 snapshots=snapshots, 1152 deployability_index=deployability_index, 1153 render_kwargs=render_kwargs, 1154 rendered_physical_properties=rendered_physical_properties, 1155 allow_destructive_snapshots=allow_destructive_snapshots, 1156 allow_additive_snapshots=allow_additive_snapshots, 1157 run_pre_post_statements=True, 1158 ) 1159 else: 1160 self._execute_create( 1161 snapshot=snapshot, 1162 table_name=snapshot.table_name(is_deployable=True), 1163 is_table_deployable=True, 1164 deployability_index=deployability_index, 1165 create_render_kwargs=render_kwargs, 1166 rendered_physical_properties=rendered_physical_properties, 1167 dry_run=True, 1168 ) 1169 1170 evaluation_strategy.run_post_statements( 1171 snapshot=snapshot, render_kwargs={**render_kwargs, "inside_transaction": False} 1172 ) 1173 1174 # Retry in case when the table is migrated concurrently from another plan application 1175 @retry( 1176 reraise=True, 1177 stop=stop_after_attempt(5), 1178 wait=wait_exponential(min=1, max=16), 1179 retry=retry_if_not_exception_type( 1180 (DestructiveChangeError, AdditiveChangeError, MigrationNotSupportedError) 1181 ), 1182 ) 1183 def _migrate_target_table( 1184 self, 1185 target_table_name: str, 1186 snapshot: Snapshot, 1187 snapshots: t.Dict[str, Snapshot], 1188 deployability_index: DeployabilityIndex, 1189 render_kwargs: t.Dict[str, t.Any], 1190 rendered_physical_properties: t.Dict[str, exp.Expr], 1191 allow_destructive_snapshots: t.Set[str], 1192 allow_additive_snapshots: t.Set[str], 1193 run_pre_post_statements: bool = False, 1194 ) -> None: 1195 adapter = self.get_adapter(snapshot.model.gateway) 1196 1197 tmp_table = exp.to_table(target_table_name) 1198 tmp_table.this.set("this", f"{tmp_table.name}_schema_tmp") 1199 tmp_table_name = tmp_table.sql() 1200 1201 if snapshot.is_materialized: 1202 self._execute_create( 1203 snapshot=snapshot, 1204 table_name=tmp_table_name, 1205 is_table_deployable=False, 1206 deployability_index=deployability_index, 1207 create_render_kwargs=render_kwargs, 1208 rendered_physical_properties=rendered_physical_properties, 1209 dry_run=False, 1210 run_pre_post_statements=run_pre_post_statements, 1211 skip_grants=True, # skip grants for tmp table 1212 ) 1213 try: 1214 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1215 logger.info( 1216 "Migrating table schema from '%s' to '%s'", 1217 tmp_table_name, 1218 target_table_name, 1219 ) 1220 evaluation_strategy.migrate( 1221 target_table_name=target_table_name, 1222 source_table_name=tmp_table_name, 1223 snapshot=snapshot, 1224 snapshots=snapshots, 1225 allow_destructive_snapshots=allow_destructive_snapshots, 1226 allow_additive_snapshots=allow_additive_snapshots, 1227 ignore_destructive=snapshot.model.on_destructive_change.is_ignore, 1228 ignore_additive=snapshot.model.on_additive_change.is_ignore, 1229 deployability_index=deployability_index, 1230 ) 1231 finally: 1232 if snapshot.is_materialized: 1233 adapter.drop_table(tmp_table_name) 1234 1235 def _promote_snapshot( 1236 self, 1237 snapshot: Snapshot, 1238 environment_naming_info: EnvironmentNamingInfo, 1239 deployability_index: DeployabilityIndex, 1240 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 1241 start: t.Optional[TimeLike] = None, 1242 end: t.Optional[TimeLike] = None, 1243 execution_time: t.Optional[TimeLike] = None, 1244 snapshots: t.Optional[t.Dict[SnapshotId, Snapshot]] = None, 1245 table_mapping: t.Optional[t.Dict[str, str]] = None, 1246 ) -> None: 1247 if not snapshot.is_model: 1248 return 1249 1250 adapter = ( 1251 self.get_adapter(snapshot.model_gateway) 1252 if environment_naming_info.gateway_managed 1253 else self.adapter 1254 ) 1255 table_name = snapshot.table_name(deployability_index.is_representative(snapshot)) 1256 view_name = snapshot.qualified_view_name.for_environment( 1257 environment_naming_info, dialect=adapter.dialect 1258 ) 1259 render_kwargs: t.Dict[str, t.Any] = dict( 1260 start=start, 1261 end=end, 1262 execution_time=execution_time, 1263 engine_adapter=adapter, 1264 deployability_index=deployability_index, 1265 table_mapping=table_mapping, 1266 runtime_stage=RuntimeStage.PROMOTING, 1267 ) 1268 1269 with ( 1270 adapter.transaction(), 1271 adapter.session(snapshot.model.render_session_properties(**render_kwargs)), 1272 ): 1273 _evaluation_strategy(snapshot, adapter).promote( 1274 table_name=table_name, 1275 view_name=view_name, 1276 model=snapshot.model, 1277 environment=environment_naming_info.name, 1278 snapshots=snapshots, 1279 snapshot=snapshot, 1280 **render_kwargs, 1281 ) 1282 1283 snapshot_by_name = {s.name: s for s in (snapshots or {}).values()} 1284 render_kwargs["snapshots"] = snapshot_by_name 1285 adapter.execute(snapshot.model.render_on_virtual_update(**render_kwargs)) 1286 1287 if on_complete is not None: 1288 on_complete(snapshot) 1289 1290 def _demote_snapshot( 1291 self, 1292 snapshot: Snapshot, 1293 environment_naming_info: EnvironmentNamingInfo, 1294 deployability_index: t.Optional[DeployabilityIndex], 1295 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]], 1296 table_mapping: t.Optional[t.Dict[str, str]] = None, 1297 ) -> None: 1298 if not snapshot.is_model: 1299 return 1300 1301 adapter = ( 1302 self.get_adapter(snapshot.model_gateway) 1303 if environment_naming_info.gateway_managed 1304 else self.adapter 1305 ) 1306 view_name = snapshot.qualified_view_name.for_environment( 1307 environment_naming_info, dialect=adapter.dialect 1308 ) 1309 with ( 1310 adapter.transaction(), 1311 adapter.session( 1312 snapshot.model.render_session_properties( 1313 engine_adapter=adapter, 1314 deployability_index=deployability_index, 1315 table_mapping=table_mapping, 1316 runtime_stage=RuntimeStage.DEMOTING, 1317 ) 1318 ), 1319 ): 1320 _evaluation_strategy(snapshot, adapter).demote(view_name) 1321 1322 if on_complete is not None: 1323 on_complete(snapshot) 1324 1325 def _cleanup_snapshot( 1326 self, 1327 snapshot: SnapshotInfoLike, 1328 dev_table_only: bool, 1329 adapter: EngineAdapter, 1330 on_complete: t.Optional[t.Callable[[str], None]], 1331 ) -> None: 1332 snapshot = snapshot.table_info 1333 1334 table_names = [(False, snapshot.table_name(is_deployable=False))] 1335 if not dev_table_only: 1336 table_names.append((True, snapshot.table_name(is_deployable=True))) 1337 1338 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1339 for is_table_deployable, table_name in table_names: 1340 try: 1341 evaluation_strategy.delete( 1342 table_name, 1343 is_table_deployable=is_table_deployable, 1344 physical_schema=snapshot.physical_schema, 1345 # we need to set cascade=true or we will get a 'cant drop because other objects depend on it'-style 1346 # error on engines that enforce referential integrity, such as Postgres 1347 # this situation can happen when a snapshot expires but downstream view snapshots that reference it have not yet expired 1348 cascade=True, 1349 ) 1350 except Exception: 1351 # Use `get_data_object` to check if the table exists instead of `table_exists` since the former 1352 # is based on `INFORMATION_SCHEMA` and avoids touching the table directly. 1353 # This is important when the table name is malformed for some reason and running any statement 1354 # that touches the table would result in an error. 1355 if adapter.get_data_object(table_name) is not None: 1356 raise 1357 logger.warning( 1358 "Skipping cleanup of table '%s' because it does not exist", table_name 1359 ) 1360 1361 if on_complete is not None: 1362 on_complete(table_name) 1363 1364 def _audit( 1365 self, 1366 audit: Audit, 1367 audit_args: t.Dict[t.Any, t.Any], 1368 snapshot: Snapshot, 1369 snapshots: t.Dict[str, Snapshot], 1370 start: t.Optional[TimeLike], 1371 end: t.Optional[TimeLike], 1372 execution_time: t.Optional[TimeLike], 1373 deployability_index: t.Optional[DeployabilityIndex], 1374 **kwargs: t.Any, 1375 ) -> AuditResult: 1376 if audit.skip: 1377 return AuditResult( 1378 audit=audit, 1379 audit_args=audit_args, 1380 model=snapshot.model_or_none, 1381 skipped=True, 1382 ) 1383 1384 # Model's "blocking" argument takes precedence over the audit's default setting 1385 blocking = audit_args.pop("blocking", None) 1386 blocking = blocking == exp.true() if blocking else audit.blocking 1387 1388 adapter = self.get_adapter(snapshot.model_gateway) 1389 1390 kwargs = { 1391 "start": start, 1392 "end": end, 1393 "execution_time": execution_time, 1394 "snapshots": snapshots, 1395 "deployability_index": deployability_index, 1396 "engine_adapter": adapter, 1397 "runtime_stage": RuntimeStage.AUDITING, 1398 **audit_args, 1399 **kwargs, 1400 } 1401 1402 if snapshot.is_model: 1403 query = snapshot.model.render_audit_query(audit, **kwargs) 1404 elif isinstance(audit, StandaloneAudit): 1405 query = audit.render_audit_query(**kwargs) 1406 else: 1407 raise SQLMeshError("Expected model or standalone audit. {snapshot}: {audit}") 1408 1409 count, *_ = adapter.fetchone( 1410 select("COUNT(*)").from_(query.subquery("audit")), 1411 quote_identifiers=True, 1412 ) # type: ignore 1413 1414 return AuditResult( 1415 audit=audit, 1416 audit_args=audit_args, 1417 model=snapshot.model_or_none, 1418 count=count, 1419 query=query, 1420 blocking=blocking, 1421 ) 1422 1423 def _create_catalogs( 1424 self, 1425 tables: t.Iterable[t.Union[exp.Table, str]], 1426 gateway: t.Optional[str] = None, 1427 ) -> None: 1428 # attempt to create catalogs for the virtual layer if possible 1429 adapter = self.get_adapter(gateway) 1430 if adapter.SUPPORTS_CREATE_DROP_CATALOG: 1431 unique_catalogs = {t.catalog for t in [exp.to_table(maybe_t) for maybe_t in tables]} 1432 for catalog_name in unique_catalogs: 1433 adapter.create_catalog(catalog_name) 1434 1435 def _create_schemas( 1436 self, 1437 gateway_table_pairs: t.Iterable[t.Tuple[t.Optional[str], t.Union[exp.Table, str]]], 1438 ) -> None: 1439 table_exprs = [(gateway, exp.to_table(t)) for gateway, t in gateway_table_pairs] 1440 unique_schemas = { 1441 (gateway, t.args["db"], t.args.get("catalog")) 1442 for gateway, t in table_exprs 1443 if t and t.db 1444 } 1445 1446 def _create_schema( 1447 gateway: t.Optional[str], schema_name: str, catalog: t.Optional[str] 1448 ) -> None: 1449 schema = schema_(schema_name, catalog) 1450 logger.info("Creating schema '%s'", schema) 1451 adapter = self.get_adapter(gateway) 1452 adapter.create_schema(schema) 1453 1454 with self.concurrent_context(): 1455 concurrent_apply_to_values( 1456 list(unique_schemas), 1457 lambda item: _create_schema(item[0], item[1], item[2]), 1458 self.ddl_concurrent_tasks, 1459 ) 1460 1461 def get_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 1462 """Returns the adapter for the specified gateway or the default adapter if none is provided.""" 1463 if gateway: 1464 if adapter := self.adapters.get(gateway): 1465 return adapter 1466 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 1467 return self.adapter 1468 1469 def _execute_create( 1470 self, 1471 snapshot: Snapshot, 1472 table_name: str, 1473 is_table_deployable: bool, 1474 deployability_index: DeployabilityIndex, 1475 create_render_kwargs: t.Dict[str, t.Any], 1476 rendered_physical_properties: t.Dict[str, exp.Expr], 1477 dry_run: bool, 1478 run_pre_post_statements: bool = True, 1479 skip_grants: bool = False, 1480 ) -> None: 1481 adapter = self.get_adapter(snapshot.model.gateway) 1482 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 1483 1484 # It can still be useful for some strategies to know if the snapshot was actually deployable 1485 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 1486 is_snapshot_representative = deployability_index.is_representative(snapshot) 1487 1488 create_render_kwargs = { 1489 **create_render_kwargs, 1490 "table_mapping": {snapshot.name: table_name}, 1491 } 1492 if run_pre_post_statements: 1493 evaluation_strategy.run_pre_statements( 1494 snapshot=snapshot, 1495 render_kwargs={**create_render_kwargs, "inside_transaction": True}, 1496 ) 1497 evaluation_strategy.create( 1498 table_name=table_name, 1499 model=snapshot.model, 1500 is_table_deployable=is_table_deployable, 1501 skip_grants=skip_grants, 1502 render_kwargs=create_render_kwargs, 1503 is_snapshot_deployable=is_snapshot_deployable, 1504 is_snapshot_representative=is_snapshot_representative, 1505 dry_run=dry_run, 1506 physical_properties=rendered_physical_properties, 1507 snapshot=snapshot, 1508 deployability_index=deployability_index, 1509 ) 1510 if run_pre_post_statements: 1511 evaluation_strategy.run_post_statements( 1512 snapshot=snapshot, 1513 render_kwargs={**create_render_kwargs, "inside_transaction": True}, 1514 ) 1515 1516 def _can_clone(self, snapshot: Snapshot, deployability_index: DeployabilityIndex) -> bool: 1517 adapter = self.get_adapter(snapshot.model.gateway) 1518 return ( 1519 snapshot.is_forward_only 1520 and snapshot.is_materialized 1521 and bool(snapshot.previous_versions) 1522 and adapter.SUPPORTS_CLONING 1523 # managed models cannot have their schema mutated because they're based on queries, so clone + alter won't work 1524 and not snapshot.is_managed 1525 and not snapshot.is_dbt_custom 1526 and not deployability_index.is_deployable(snapshot) 1527 # If the deployable table is missing we can't clone it 1528 and adapter.table_exists(snapshot.table_name()) 1529 ) 1530 1531 def _get_physical_data_objects( 1532 self, 1533 target_snapshots: t.Iterable[Snapshot], 1534 deployability_index: DeployabilityIndex, 1535 ) -> t.Dict[SnapshotId, DataObject]: 1536 """Returns a dictionary of snapshot IDs to existing data objects of their physical tables. 1537 1538 Args: 1539 target_snapshots: Target snapshots. 1540 deployability_index: The deployability index to determine whether to look for a deployable or 1541 a non-deployable physical table. 1542 1543 Returns: 1544 A dictionary of snapshot IDs to existing data objects of their physical tables. If the data object 1545 for a snapshot is not found, it will not be included in the dictionary. 1546 """ 1547 return self._get_data_objects( 1548 target_snapshots, 1549 lambda s: exp.to_table( 1550 s.table_name(deployability_index.is_deployable(s)), dialect=s.model.dialect 1551 ), 1552 ) 1553 1554 def _get_virtual_data_objects( 1555 self, 1556 target_snapshots: t.Iterable[Snapshot], 1557 environment_naming_info: EnvironmentNamingInfo, 1558 ) -> t.Dict[SnapshotId, DataObject]: 1559 """Returns a dictionary of snapshot IDs to existing data objects of their virtual views. 1560 1561 Args: 1562 target_snapshots: Target snapshots. 1563 environment_naming_info: The environment naming info of the target virtual environment. 1564 1565 Returns: 1566 A dictionary of snapshot IDs to existing data objects of their virtual views. If the data object 1567 for a snapshot is not found, it will not be included in the dictionary. 1568 """ 1569 1570 def _get_view_name(s: Snapshot) -> exp.Table: 1571 adapter = ( 1572 self.get_adapter(s.model_gateway) 1573 if environment_naming_info.gateway_managed 1574 else self.adapter 1575 ) 1576 return exp.to_table( 1577 s.qualified_view_name.for_environment( 1578 environment_naming_info, dialect=adapter.dialect 1579 ), 1580 dialect=adapter.dialect, 1581 ) 1582 1583 return self._get_data_objects(target_snapshots, _get_view_name) 1584 1585 def _get_data_objects( 1586 self, 1587 target_snapshots: t.Iterable[Snapshot], 1588 table_name_callable: t.Callable[[Snapshot], exp.Table], 1589 ) -> t.Dict[SnapshotId, DataObject]: 1590 """Returns a dictionary of snapshot IDs to existing data objects. 1591 1592 Args: 1593 target_snapshots: Target snapshots. 1594 table_name_callable: A function that takes a snapshot and returns the table to look for. 1595 1596 Returns: 1597 A dictionary of snapshot IDs to existing data objects. If the data object for a snapshot is not found, 1598 it will not be included in the dictionary. 1599 """ 1600 tables_by_gateway_and_schema: t.Dict[t.Union[str, None], t.Dict[exp.Table, set[str]]] = ( 1601 defaultdict(lambda: defaultdict(set)) 1602 ) 1603 snapshots_by_table_name: t.Dict[exp.Table, t.Dict[str, Snapshot]] = defaultdict(dict) 1604 for snapshot in target_snapshots: 1605 if not snapshot.is_model or snapshot.is_symbolic: 1606 continue 1607 table = table_name_callable(snapshot) 1608 table_schema = d.schema_(table.db, catalog=table.catalog) 1609 tables_by_gateway_and_schema[snapshot.model_gateway][table_schema].add(table.name) 1610 snapshots_by_table_name[table_schema][table.name] = snapshot 1611 1612 def _get_data_objects_in_schema( 1613 schema: exp.Table, 1614 object_names: t.Optional[t.Set[str]] = None, 1615 gateway: t.Optional[str] = None, 1616 ) -> t.List[DataObject]: 1617 logger.info("Listing data objects in schema %s", schema.sql()) 1618 return self.get_adapter(gateway).get_data_objects( 1619 schema, object_names, safe_to_cache=True 1620 ) 1621 1622 with self.concurrent_context(): 1623 snapshot_id_to_obj: t.Dict[SnapshotId, DataObject] = {} 1624 # A schema can be shared across multiple engines, so we need to group tables by both gateway and schema 1625 for gateway, tables_by_schema in tables_by_gateway_and_schema.items(): 1626 schema_list = list(tables_by_schema.keys()) 1627 results = concurrent_apply_to_values( 1628 schema_list, 1629 lambda s: _get_data_objects_in_schema( 1630 schema=s, object_names=tables_by_schema.get(s), gateway=gateway 1631 ), 1632 self.ddl_concurrent_tasks, 1633 ) 1634 1635 for schema, objs in zip(schema_list, results): 1636 snapshots_by_name = snapshots_by_table_name.get(schema, {}) 1637 for obj in objs: 1638 if obj.name in snapshots_by_name: 1639 snapshot_id_to_obj[snapshots_by_name[obj.name].snapshot_id] = obj 1640 1641 return snapshot_id_to_obj
Evaluates a snapshot given runtime arguments through an arbitrary EngineAdapter.
The SnapshotEvaluator contains the business logic to generically evaluate a snapshot. It is responsible for delegating queries to the EngineAdapter. The SnapshotEvaluator does not directly communicate with the underlying execution engine.
Arguments:
- adapters: A single EngineAdapter or a dictionary of EngineAdapters where the key is the gateway name. When a dictionary is provided, and not an explicit default gateway its first item is treated as the default adapter and used for the virtual layer.
- ddl_concurrent_tasks: The number of concurrent tasks used for DDL operations (table / view creation, deletion, etc). Default: 1.
129 def __init__( 130 self, 131 adapters: EngineAdapter | t.Dict[str, EngineAdapter], 132 ddl_concurrent_tasks: int = 1, 133 selected_gateway: t.Optional[str] = None, 134 ): 135 self.adapters = ( 136 adapters if isinstance(adapters, t.Dict) else {selected_gateway or "": adapters} 137 ) 138 self.execution_tracker = QueryExecutionTracker() 139 self.adapters = { 140 gateway: adapter.with_settings(query_execution_tracker=self.execution_tracker) 141 for gateway, adapter in self.adapters.items() 142 } 143 self.adapter = ( 144 next(iter(self.adapters.values())) 145 if not selected_gateway 146 else self.adapters[selected_gateway] 147 ) 148 self.selected_gateway = selected_gateway 149 self.ddl_concurrent_tasks = ddl_concurrent_tasks
151 def evaluate( 152 self, 153 snapshot: Snapshot, 154 *, 155 start: TimeLike, 156 end: TimeLike, 157 execution_time: TimeLike, 158 snapshots: t.Dict[str, Snapshot], 159 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 160 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 161 deployability_index: t.Optional[DeployabilityIndex] = None, 162 batch_index: int = 0, 163 target_table_exists: t.Optional[bool] = None, 164 **kwargs: t.Any, 165 ) -> t.Optional[str]: 166 """Renders the snapshot's model, executes it and stores the result in the snapshot's physical table. 167 168 Args: 169 snapshot: Snapshot to evaluate. 170 start: The start datetime to render. 171 end: The end datetime to render. 172 execution_time: The date/time time reference to use for execution time. 173 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 174 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 175 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 176 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 177 batch_index: If the snapshot is part of a batch of related snapshots; which index in the batch is it 178 target_table_exists: Whether the target table exists. If None, the table will be checked for existence. 179 kwargs: Additional kwargs to pass to the renderer. 180 181 Returns: 182 The WAP ID of this evaluation if supported, None otherwise. 183 """ 184 with self.execution_tracker.track_execution( 185 SnapshotIdBatch(snapshot_id=snapshot.snapshot_id, batch_id=batch_index) 186 ): 187 result = self._evaluate_snapshot( 188 start=start, 189 end=end, 190 execution_time=execution_time, 191 snapshot=snapshot, 192 snapshots=snapshots, 193 allow_destructive_snapshots=allow_destructive_snapshots or set(), 194 allow_additive_snapshots=allow_additive_snapshots or set(), 195 deployability_index=deployability_index, 196 batch_index=batch_index, 197 target_table_exists=target_table_exists, 198 **kwargs, 199 ) 200 if result is None or isinstance(result, str): 201 return result 202 raise SQLMeshError( 203 f"Unexpected result {result} when evaluating snapshot {snapshot.snapshot_id}." 204 )
Renders the snapshot's model, executes it and stores the result in the snapshot's physical table.
Arguments:
- snapshot: Snapshot to evaluate.
- start: The start datetime to render.
- end: The end datetime to render.
- execution_time: The date/time time reference to use for execution time.
- snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations.
- allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed.
- allow_additive_snapshots: Snapshots for which additive schema changes are allowed.
- deployability_index: Determines snapshots that are deployable in the context of this evaluation.
- batch_index: If the snapshot is part of a batch of related snapshots; which index in the batch is it
- target_table_exists: Whether the target table exists. If None, the table will be checked for existence.
- kwargs: Additional kwargs to pass to the renderer.
Returns:
The WAP ID of this evaluation if supported, None otherwise.
206 def evaluate_and_fetch( 207 self, 208 snapshot: Snapshot, 209 *, 210 start: TimeLike, 211 end: TimeLike, 212 execution_time: TimeLike, 213 snapshots: t.Dict[str, Snapshot], 214 limit: int, 215 deployability_index: t.Optional[DeployabilityIndex] = None, 216 **kwargs: t.Any, 217 ) -> DF: 218 """Renders the snapshot's model, executes it and returns a dataframe with the result. 219 220 Args: 221 snapshot: Snapshot to evaluate. 222 start: The start datetime to render. 223 end: The end datetime to render. 224 execution_time: The date/time time reference to use for execution time. 225 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 226 limit: The maximum number of rows to fetch. 227 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 228 kwargs: Additional kwargs to pass to the renderer. 229 230 Returns: 231 The result of the evaluation as a dataframe. 232 """ 233 import pandas as pd 234 235 adapter = self.get_adapter(snapshot.model.gateway) 236 render_kwargs = dict( 237 start=start, 238 end=end, 239 execution_time=execution_time, 240 snapshot=snapshot, 241 runtime_stage=RuntimeStage.EVALUATING, 242 **kwargs, 243 ) 244 queries_or_dfs = self._render_snapshot_for_evaluation( 245 snapshot, 246 snapshots, 247 deployability_index or DeployabilityIndex.all_deployable(), 248 render_kwargs, 249 ) 250 query_or_df = next(queries_or_dfs) 251 if isinstance(query_or_df, pd.DataFrame): 252 return query_or_df.head(limit) 253 if not isinstance(query_or_df, exp.Expr): 254 # We assume that if this branch is reached, `query_or_df` is a pyspark / snowpark / bigframe dataframe, 255 # so we use `limit` instead of `head` to get back a dataframe instead of List[Row] 256 # https://spark.apache.org/docs/3.1.1/api/python/reference/api/pyspark.sql.DataFrame.head.html#pyspark.sql.DataFrame.head 257 return query_or_df.limit(limit) 258 259 assert isinstance(query_or_df, exp.Query) 260 261 existing_limit = query_or_df.args.get("limit") 262 if existing_limit: 263 limit = min(limit, execute(exp.select(existing_limit.expression)).rows[0][0]) 264 assert limit is not None 265 266 return adapter._fetch_native_df(query_or_df.limit(limit))
Renders the snapshot's model, executes it and returns a dataframe with the result.
Arguments:
- snapshot: Snapshot to evaluate.
- start: The start datetime to render.
- end: The end datetime to render.
- execution_time: The date/time time reference to use for execution time.
- snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations.
- limit: The maximum number of rows to fetch.
- deployability_index: Determines snapshots that are deployable in the context of this evaluation.
- kwargs: Additional kwargs to pass to the renderer.
Returns:
The result of the evaluation as a dataframe.
268 def promote( 269 self, 270 target_snapshots: t.Iterable[Snapshot], 271 environment_naming_info: EnvironmentNamingInfo, 272 deployability_index: t.Optional[DeployabilityIndex] = None, 273 start: t.Optional[TimeLike] = None, 274 end: t.Optional[TimeLike] = None, 275 execution_time: t.Optional[TimeLike] = None, 276 snapshots: t.Optional[t.Dict[SnapshotId, Snapshot]] = None, 277 table_mapping: t.Optional[t.Dict[str, str]] = None, 278 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 279 ) -> None: 280 """Promotes the given collection of snapshots in the target environment by replacing a corresponding 281 view with a physical table associated with the given snapshot. 282 283 Args: 284 target_snapshots: Snapshots to promote. 285 environment_naming_info: Naming information for the target environment. 286 deployability_index: Determines snapshots that are deployable in the context of this promotion. 287 on_complete: A callback to call on each successfully promoted snapshot. 288 """ 289 290 tables_by_gateway: t.Dict[t.Union[str, None], t.List[exp.Table]] = defaultdict(list) 291 for snapshot in target_snapshots: 292 if snapshot.is_model and not snapshot.is_symbolic: 293 gateway = ( 294 snapshot.model_gateway if environment_naming_info.gateway_managed else None 295 ) 296 adapter = self.get_adapter(gateway) 297 table = snapshot.qualified_view_name.table_for_environment( 298 environment_naming_info, dialect=adapter.dialect 299 ) 300 tables_by_gateway[gateway].append(table) 301 302 # A schema can be shared across multiple engines, so we need to group by gateway 303 for gateway, tables in tables_by_gateway.items(): 304 if environment_naming_info.suffix_target.is_catalog: 305 self._create_catalogs(tables=tables, gateway=gateway) 306 307 gateway_table_pairs = [ 308 (gateway, table) for gateway, tables in tables_by_gateway.items() for table in tables 309 ] 310 self._create_schemas(gateway_table_pairs=gateway_table_pairs) 311 312 # Fetch the view data objects for the promoted snapshots to get them cached 313 self._get_virtual_data_objects(target_snapshots, environment_naming_info) 314 315 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 316 with self.concurrent_context(): 317 concurrent_apply_to_snapshots( 318 target_snapshots, 319 lambda s: self._promote_snapshot( 320 s, 321 start=start, 322 end=end, 323 execution_time=execution_time, 324 snapshots=snapshots, 325 table_mapping=table_mapping, 326 environment_naming_info=environment_naming_info, 327 deployability_index=deployability_index, # type: ignore 328 on_complete=on_complete, 329 ), 330 self.ddl_concurrent_tasks, 331 )
Promotes the given collection of snapshots in the target environment by replacing a corresponding view with a physical table associated with the given snapshot.
Arguments:
- target_snapshots: Snapshots to promote.
- environment_naming_info: Naming information for the target environment.
- deployability_index: Determines snapshots that are deployable in the context of this promotion.
- on_complete: A callback to call on each successfully promoted snapshot.
333 def demote( 334 self, 335 target_snapshots: t.Iterable[Snapshot], 336 environment_naming_info: EnvironmentNamingInfo, 337 table_mapping: t.Optional[t.Dict[str, str]] = None, 338 deployability_index: t.Optional[DeployabilityIndex] = None, 339 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 340 ) -> None: 341 """Demotes the given collection of snapshots in the target environment by removing its view. 342 343 Args: 344 target_snapshots: Snapshots to demote. 345 environment_naming_info: Naming info for the target environment. 346 on_complete: A callback to call on each successfully demoted snapshot. 347 """ 348 with self.concurrent_context(): 349 concurrent_apply_to_snapshots( 350 target_snapshots, 351 lambda s: self._demote_snapshot( 352 s, 353 environment_naming_info, 354 deployability_index=deployability_index, 355 on_complete=on_complete, 356 table_mapping=table_mapping, 357 ), 358 self.ddl_concurrent_tasks, 359 )
Demotes the given collection of snapshots in the target environment by removing its view.
Arguments:
- target_snapshots: Snapshots to demote.
- environment_naming_info: Naming info for the target environment.
- on_complete: A callback to call on each successfully demoted snapshot.
361 def create( 362 self, 363 target_snapshots: t.Iterable[Snapshot], 364 snapshots: t.Dict[SnapshotId, Snapshot], 365 deployability_index: t.Optional[DeployabilityIndex] = None, 366 on_start: t.Optional[t.Callable] = None, 367 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 368 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 369 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 370 ) -> CompletionStatus: 371 """Creates a physical snapshot schema and table for the given collection of snapshots. 372 373 Args: 374 target_snapshots: Target snapshots. 375 snapshots: Mapping of snapshot ID to snapshot. 376 deployability_index: Determines snapshots that are deployable in the context of this creation. 377 on_start: A callback to initialize the snapshot creation progress bar. 378 on_complete: A callback to call on each successfully created snapshot. 379 allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes. 380 allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes. 381 382 Returns: 383 CompletionStatus: The status of the creation operation (success, failure, nothing to do). 384 """ 385 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 386 387 snapshots_to_create = self.get_snapshots_to_create(target_snapshots, deployability_index) 388 if not snapshots_to_create: 389 return CompletionStatus.NOTHING_TO_DO 390 if on_start: 391 on_start(snapshots_to_create) 392 393 self._create_snapshots( 394 snapshots_to_create=snapshots_to_create, 395 snapshots={s.name: s for s in snapshots.values()}, 396 deployability_index=deployability_index, 397 on_complete=on_complete, 398 allow_destructive_snapshots=allow_destructive_snapshots or set(), 399 allow_additive_snapshots=allow_additive_snapshots or set(), 400 ) 401 return CompletionStatus.SUCCESS
Creates a physical snapshot schema and table for the given collection of snapshots.
Arguments:
- target_snapshots: Target snapshots.
- snapshots: Mapping of snapshot ID to snapshot.
- deployability_index: Determines snapshots that are deployable in the context of this creation.
- on_start: A callback to initialize the snapshot creation progress bar.
- on_complete: A callback to call on each successfully created snapshot.
- allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes.
- allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes.
Returns:
CompletionStatus: The status of the creation operation (success, failure, nothing to do).
403 def create_physical_schemas( 404 self, snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex 405 ) -> None: 406 """Creates the physical schemas for the given snapshots. 407 408 Args: 409 snapshots: Snapshots to create physical schemas for. 410 deployability_index: Determines snapshots that are deployable in the context of this creation. 411 """ 412 tables_by_gateway: t.Dict[t.Optional[str], t.List[str]] = defaultdict(list) 413 for snapshot in snapshots: 414 if snapshot.is_model and not snapshot.is_symbolic: 415 tables_by_gateway[snapshot.model_gateway].append( 416 snapshot.table_name(is_deployable=deployability_index.is_deployable(snapshot)) 417 ) 418 419 gateway_table_pairs = [ 420 (gateway, table) for gateway, tables in tables_by_gateway.items() for table in tables 421 ] 422 self._create_schemas(gateway_table_pairs=gateway_table_pairs)
Creates the physical schemas for the given snapshots.
Arguments:
- snapshots: Snapshots to create physical schemas for.
- deployability_index: Determines snapshots that are deployable in the context of this creation.
424 def get_snapshots_to_create( 425 self, target_snapshots: t.Iterable[Snapshot], deployability_index: DeployabilityIndex 426 ) -> t.List[Snapshot]: 427 """Returns a list of snapshots that need to have their physical tables created. 428 429 Args: 430 target_snapshots: Target snapshots. 431 deployability_index: Determines snapshots that are deployable / representative in the context of this creation. 432 """ 433 existing_data_objects = self._get_physical_data_objects( 434 target_snapshots, deployability_index 435 ) 436 snapshots_to_create = [] 437 for snapshot in target_snapshots: 438 if not snapshot.is_model or snapshot.is_symbolic: 439 continue 440 if snapshot.snapshot_id not in existing_data_objects or ( 441 snapshot.is_seed and not snapshot.intervals 442 ): 443 snapshots_to_create.append(snapshot) 444 445 return snapshots_to_create
Returns a list of snapshots that need to have their physical tables created.
Arguments:
- target_snapshots: Target snapshots.
- deployability_index: Determines snapshots that are deployable / representative in the context of this creation.
474 def migrate( 475 self, 476 target_snapshots: t.Iterable[Snapshot], 477 snapshots: t.Dict[SnapshotId, Snapshot], 478 allow_destructive_snapshots: t.Optional[t.Set[str]] = None, 479 allow_additive_snapshots: t.Optional[t.Set[str]] = None, 480 deployability_index: t.Optional[DeployabilityIndex] = None, 481 ) -> None: 482 """Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots. 483 484 Args: 485 target_snapshots: Target snapshots. 486 snapshots: Mapping of snapshot ID to snapshot. 487 allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes. 488 allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes. 489 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 490 """ 491 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 492 target_data_objects = self._get_physical_data_objects(target_snapshots, deployability_index) 493 if not target_data_objects: 494 return 495 496 if not snapshots: 497 snapshots = {s.snapshot_id: s for s in target_snapshots} 498 499 allow_destructive_snapshots = allow_destructive_snapshots or set() 500 allow_additive_snapshots = allow_additive_snapshots or set() 501 snapshots_by_name = {s.name: s for s in snapshots.values()} 502 with self.concurrent_context(): 503 # Only migrate snapshots for which there's an existing data object 504 concurrent_apply_to_snapshots( 505 target_snapshots, 506 lambda s: self._migrate_snapshot( 507 s, 508 snapshots_by_name, 509 target_data_objects.get(s.snapshot_id), 510 allow_destructive_snapshots, 511 allow_additive_snapshots, 512 self.get_adapter(s.model_gateway), 513 deployability_index, 514 ), 515 self.ddl_concurrent_tasks, 516 )
Alters a physical snapshot table to match its snapshot's schema for the given collection of snapshots.
Arguments:
- target_snapshots: Target snapshots.
- snapshots: Mapping of snapshot ID to snapshot.
- allow_destructive_snapshots: Set of snapshots that are allowed to have destructive schema changes.
- allow_additive_snapshots: Set of snapshots that are allowed to have additive schema changes.
- deployability_index: Determines snapshots that are deployable in the context of this evaluation.
518 def cleanup( 519 self, 520 target_snapshots: t.Iterable[SnapshotTableCleanupTask], 521 on_complete: t.Optional[t.Callable[[str], None]] = None, 522 ) -> None: 523 """Cleans up the given snapshots by removing its table 524 525 Args: 526 target_snapshots: Snapshots to cleanup. 527 on_complete: A callback to call on each successfully deleted database object. 528 """ 529 target_snapshots = [ 530 t for t in target_snapshots if t.snapshot.is_model and not t.snapshot.is_symbolic 531 ] 532 snapshots_to_dev_table_only = { 533 t.snapshot.snapshot_id: t.dev_table_only for t in target_snapshots 534 } 535 with self.concurrent_context(): 536 concurrent_apply_to_snapshots( 537 [t.snapshot for t in target_snapshots], 538 lambda s: self._cleanup_snapshot( 539 s, 540 snapshots_to_dev_table_only[s.snapshot_id], 541 self.get_adapter(s.model_gateway), 542 on_complete, 543 ), 544 self.ddl_concurrent_tasks, 545 reverse_order=True, 546 )
Cleans up the given snapshots by removing its table
Arguments:
- target_snapshots: Snapshots to cleanup.
- on_complete: A callback to call on each successfully deleted database object.
548 def audit( 549 self, 550 snapshot: Snapshot, 551 *, 552 snapshots: t.Dict[str, Snapshot], 553 start: t.Optional[TimeLike] = None, 554 end: t.Optional[TimeLike] = None, 555 execution_time: t.Optional[TimeLike] = None, 556 deployability_index: t.Optional[DeployabilityIndex] = None, 557 wap_id: t.Optional[str] = None, 558 **kwargs: t.Any, 559 ) -> t.List[AuditResult]: 560 """Execute a snapshot's node's audit queries. 561 562 Args: 563 snapshot: Snapshot to evaluate. 564 snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations. 565 start: The start datetime to audit. Defaults to epoch start. 566 end: The end datetime to audit. Defaults to epoch start. 567 execution_time: The date/time time reference to use for execution time. 568 deployability_index: Determines snapshots that are deployable in the context of this evaluation. 569 wap_id: The WAP ID if applicable, None otherwise. 570 kwargs: Additional kwargs to pass to the renderer. 571 """ 572 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 573 adapter = self.get_adapter(snapshot.model_gateway) 574 575 if not snapshot.version: 576 raise ConfigError( 577 f"Cannot audit '{snapshot.name}' because it has not been versioned yet. Apply a plan first." 578 ) 579 580 if wap_id is not None: 581 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 582 original_table_name = snapshot.table_name( 583 is_deployable=deployability_index.is_deployable(snapshot) 584 ) 585 wap_table_name = adapter.wap_table_name(original_table_name, wap_id) 586 logger.info( 587 "Auditing WAP table '%s', snapshot %s", 588 wap_table_name, 589 snapshot.snapshot_id, 590 ) 591 592 table_mapping = kwargs.get("table_mapping") or {} 593 table_mapping[snapshot.name] = wap_table_name 594 kwargs["table_mapping"] = table_mapping 595 kwargs["this_model"] = exp.to_table(wap_table_name, dialect=adapter.dialect) 596 597 results = [] 598 599 audits_with_args = snapshot.node.audits_with_args 600 601 force_non_blocking = False 602 603 if audits_with_args: 604 logger.info("Auditing snapshot %s", snapshot.snapshot_id) 605 606 if not deployability_index.is_deployable(snapshot) and not adapter.SUPPORTS_CLONING: 607 # For dev preview tables that aren't based on clones of the production table, only a subset of the data is typically available 608 # However, users still expect audits to run anwyay. Some audits (such as row count) are practically guaranteed to fail 609 # when run on only a subset of data, so we switch all audits to non blocking and the user can decide if they still want to proceed 610 force_non_blocking = True 611 612 for audit, audit_args in audits_with_args: 613 if force_non_blocking: 614 # remove any blocking indicator on the model itself 615 audit_args.pop("blocking", None) 616 # so that we can fall back to the audit's setting, which we override to blocking: False 617 audit = audit.model_copy(update={"blocking": False}) 618 619 results.append( 620 self._audit( 621 audit=audit, 622 audit_args=audit_args, 623 snapshot=snapshot, 624 snapshots=snapshots, 625 start=start, 626 end=end, 627 execution_time=execution_time, 628 deployability_index=deployability_index, 629 **kwargs, 630 ) 631 ) 632 633 if wap_id is not None: 634 logger.info( 635 "Publishing evaluation results for snapshot %s, WAP ID '%s'", 636 snapshot.snapshot_id, 637 wap_id, 638 ) 639 self.wap_publish_snapshot(snapshot, wap_id, deployability_index) 640 641 return results
Execute a snapshot's node's audit queries.
Arguments:
- snapshot: Snapshot to evaluate.
- snapshots: All upstream snapshots (by name) to use for expansion and mapping of physical locations.
- start: The start datetime to audit. Defaults to epoch start.
- end: The end datetime to audit. Defaults to epoch start.
- execution_time: The date/time time reference to use for execution time.
- deployability_index: Determines snapshots that are deployable in the context of this evaluation.
- wap_id: The WAP ID if applicable, None otherwise.
- kwargs: Additional kwargs to pass to the renderer.
650 def recycle(self) -> None: 651 """Closes all open connections and releases all allocated resources associated with any thread 652 except the calling one.""" 653 try: 654 for adapter in self.adapters.values(): 655 adapter.recycle() 656 657 except Exception: 658 logger.exception("Failed to recycle Snapshot Evaluator")
Closes all open connections and releases all allocated resources associated with any thread except the calling one.
660 def close(self) -> None: 661 """Closes all open connections and releases all allocated resources.""" 662 try: 663 for adapter in self.adapters.values(): 664 adapter.close() 665 except Exception: 666 logger.exception("Failed to close Snapshot Evaluator")
Closes all open connections and releases all allocated resources.
668 def set_correlation_id(self, correlation_id: CorrelationId) -> SnapshotEvaluator: 669 return SnapshotEvaluator( 670 { 671 gateway: adapter.with_settings(correlation_id=correlation_id) 672 for gateway, adapter in self.adapters.items() 673 }, 674 self.ddl_concurrent_tasks, 675 self.selected_gateway, 676 )
849 def create_snapshot( 850 self, 851 snapshot: Snapshot, 852 snapshots: t.Dict[str, Snapshot], 853 deployability_index: DeployabilityIndex, 854 allow_destructive_snapshots: t.Set[str], 855 allow_additive_snapshots: t.Set[str], 856 on_complete: t.Optional[t.Callable[[SnapshotInfoLike], None]] = None, 857 ) -> None: 858 """Creates a physical table for the given snapshot. 859 860 Args: 861 snapshot: Snapshot to create. 862 snapshots: All upstream snapshots to use for expansion and mapping of physical locations. 863 deployability_index: Determines snapshots that are deployable in the context of this creation. 864 on_complete: A callback to call on each successfully created database object. 865 allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed. 866 allow_additive_snapshots: Snapshots for which additive schema changes are allowed. 867 """ 868 if not snapshot.is_model: 869 return 870 871 logger.info("Creating a physical table for snapshot %s", snapshot.snapshot_id) 872 873 adapter = self.get_adapter(snapshot.model.gateway) 874 create_render_kwargs: t.Dict[str, t.Any] = dict( 875 engine_adapter=adapter, 876 snapshots=snapshots, 877 runtime_stage=RuntimeStage.CREATING, 878 deployability_index=deployability_index, 879 ) 880 881 evaluation_strategy = _evaluation_strategy(snapshot, adapter) 882 evaluation_strategy.run_pre_statements( 883 snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} 884 ) 885 886 with ( 887 adapter.transaction(), 888 adapter.session(snapshot.model.render_session_properties(**create_render_kwargs)), 889 ): 890 rendered_physical_properties = snapshot.model.render_physical_properties( 891 **create_render_kwargs 892 ) 893 894 if self._can_clone(snapshot, deployability_index): 895 self._clone_snapshot_in_dev( 896 snapshot=snapshot, 897 snapshots=snapshots, 898 deployability_index=deployability_index, 899 render_kwargs=create_render_kwargs, 900 rendered_physical_properties=rendered_physical_properties, 901 allow_destructive_snapshots=allow_destructive_snapshots, 902 allow_additive_snapshots=allow_additive_snapshots, 903 run_pre_post_statements=True, 904 ) 905 else: 906 is_table_deployable = deployability_index.is_deployable(snapshot) 907 self._execute_create( 908 snapshot=snapshot, 909 table_name=snapshot.table_name(is_deployable=is_table_deployable), 910 is_table_deployable=is_table_deployable, 911 deployability_index=deployability_index, 912 create_render_kwargs=create_render_kwargs, 913 rendered_physical_properties=rendered_physical_properties, 914 dry_run=True, 915 ) 916 917 evaluation_strategy.run_post_statements( 918 snapshot=snapshot, render_kwargs={**create_render_kwargs, "inside_transaction": False} 919 ) 920 921 if on_complete is not None: 922 on_complete(snapshot)
Creates a physical table for the given snapshot.
Arguments:
- snapshot: Snapshot to create.
- snapshots: All upstream snapshots to use for expansion and mapping of physical locations.
- deployability_index: Determines snapshots that are deployable in the context of this creation.
- on_complete: A callback to call on each successfully created database object.
- allow_destructive_snapshots: Snapshots for which destructive schema changes are allowed.
- allow_additive_snapshots: Snapshots for which additive schema changes are allowed.
924 def wap_publish_snapshot( 925 self, 926 snapshot: Snapshot, 927 wap_id: str, 928 deployability_index: t.Optional[DeployabilityIndex], 929 ) -> None: 930 deployability_index = deployability_index or DeployabilityIndex.all_deployable() 931 table_name = snapshot.table_name(is_deployable=deployability_index.is_deployable(snapshot)) 932 adapter = self.get_adapter(snapshot.model_gateway) 933 adapter.wap_publish(table_name, wap_id)
1461 def get_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: 1462 """Returns the adapter for the specified gateway or the default adapter if none is provided.""" 1463 if gateway: 1464 if adapter := self.adapters.get(gateway): 1465 return adapter 1466 raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") 1467 return self.adapter
Returns the adapter for the specified gateway or the default adapter if none is provided.
1694class EvaluationStrategy(abc.ABC): 1695 def __init__(self, adapter: EngineAdapter): 1696 self.adapter = adapter 1697 1698 @abc.abstractmethod 1699 def insert( 1700 self, 1701 table_name: str, 1702 query_or_df: QueryOrDF, 1703 model: Model, 1704 is_first_insert: bool, 1705 render_kwargs: t.Dict[str, t.Any], 1706 **kwargs: t.Any, 1707 ) -> None: 1708 """Inserts the given query or a DataFrame into the target table or a view. 1709 1710 Args: 1711 table_name: The name of the target table or view. 1712 query_or_df: A query or a DataFrame to insert. 1713 model: The target model. 1714 is_first_insert: Whether this is the first insert for this version of a model. This value is set to True 1715 if no data has been previously inserted into the target table, or when the entire history of the target model has 1716 been restated. Note that in the latter case, the table might contain data from previous executions, and it is the 1717 responsibility of a specific evaluation strategy to handle the truncation of the table if necessary. 1718 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1719 """ 1720 1721 @abc.abstractmethod 1722 def append( 1723 self, 1724 table_name: str, 1725 query_or_df: QueryOrDF, 1726 model: Model, 1727 render_kwargs: t.Dict[str, t.Any], 1728 **kwargs: t.Any, 1729 ) -> None: 1730 """Appends the given query or a DataFrame to the existing table. 1731 1732 Args: 1733 table_name: The target table name. 1734 query_or_df: A query or a DataFrame to insert. 1735 model: The target model. 1736 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1737 """ 1738 1739 @abc.abstractmethod 1740 def create( 1741 self, 1742 table_name: str, 1743 model: Model, 1744 is_table_deployable: bool, 1745 render_kwargs: t.Dict[str, t.Any], 1746 skip_grants: bool, 1747 **kwargs: t.Any, 1748 ) -> None: 1749 """Creates the target table or view. 1750 1751 Note that the intention here is to just create the table structure, data is loaded in insert() and append() 1752 1753 Args: 1754 table_name: The name of a table or a view. 1755 model: The target model. 1756 is_table_deployable: True if this creation request is for the "main" table that *might* be deployed to a production environment. 1757 False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex 1758 which determines if the snapshot is deployable to production or not 1759 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1760 """ 1761 1762 @abc.abstractmethod 1763 def migrate( 1764 self, 1765 target_table_name: str, 1766 source_table_name: str, 1767 snapshot: Snapshot, 1768 *, 1769 ignore_destructive: bool, 1770 ignore_additive: bool, 1771 **kwargs: t.Any, 1772 ) -> None: 1773 """Migrates the target table schema so that it corresponds to the source table schema. 1774 1775 Args: 1776 target_table_name: The target table name. 1777 source_table_name: The source table name. 1778 snapshot: The target snapshot. 1779 ignore_destructive: If True, destructive changes are not created when migrating. 1780 This is used for forward-only models that are being migrated to a new version. 1781 ignore_additive: If True, additive changes are not created when migrating. 1782 This is used for forward-only models that are being migrated to a new version. 1783 """ 1784 1785 @abc.abstractmethod 1786 def delete(self, name: str, **kwargs: t.Any) -> None: 1787 """Deletes a target table or a view. 1788 1789 Args: 1790 name: The name of a table or a view. 1791 """ 1792 1793 @abc.abstractmethod 1794 def promote( 1795 self, 1796 table_name: str, 1797 view_name: str, 1798 model: Model, 1799 environment: str, 1800 **kwargs: t.Any, 1801 ) -> None: 1802 """Updates the target view to point to the target table. 1803 1804 Args: 1805 table_name: The name of a table in the physical layer that is being promoted. 1806 view_name: The name of the target view in the virtual layer. 1807 model: The model that is being promoted. 1808 environment: The name of the target environment. 1809 """ 1810 1811 @abc.abstractmethod 1812 def demote(self, view_name: str, **kwargs: t.Any) -> None: 1813 """Deletes the target view in the virtual layer. 1814 1815 Args: 1816 view_name: The name of the target view in the virtual layer. 1817 """ 1818 1819 @abc.abstractmethod 1820 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 1821 """Executes the snapshot's pre statements. 1822 1823 Args: 1824 snapshot: The target snapshot. 1825 render_kwargs: Additional key-value arguments to pass when rendering the statements. 1826 """ 1827 1828 @abc.abstractmethod 1829 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 1830 """Executes the snapshot's post statements. 1831 1832 Args: 1833 snapshot: The target snapshot. 1834 render_kwargs: Additional key-value arguments to pass when rendering the statements. 1835 """ 1836 1837 def _apply_grants( 1838 self, 1839 model: Model, 1840 table_name: str, 1841 target_layer: GrantsTargetLayer, 1842 is_snapshot_deployable: bool = False, 1843 ) -> None: 1844 """Apply grants for a model if grants are configured. 1845 1846 This method provides consistent grants application across all evaluation strategies. 1847 It ensures that whenever a physical database object (table, view, materialized view) 1848 is created or modified, the appropriate grants are applied. 1849 1850 Args: 1851 model: The SQLMesh model containing grants configuration 1852 table_name: The target table/view name to apply grants to 1853 target_layer: The grants application layer (physical or virtual) 1854 is_snapshot_deployable: Whether the snapshot is deployable (targeting production) 1855 """ 1856 grants_config = model.grants 1857 if grants_config is None: 1858 return 1859 1860 if not self.adapter.SUPPORTS_GRANTS: 1861 logger.warning( 1862 f"Engine {self.adapter.__class__.__name__} does not support grants. " 1863 f"Skipping grants application for model {model.name}" 1864 ) 1865 return 1866 1867 model_grants_target_layer = model.grants_target_layer 1868 deployable_vde_dev_only = ( 1869 is_snapshot_deployable and model.virtual_environment_mode.is_dev_only 1870 ) 1871 1872 # table_type is always a VIEW in the virtual layer unless model is deployable and VDE is dev_only 1873 # in which case we fall back to the model's model_grants_table_type 1874 if target_layer == GrantsTargetLayer.VIRTUAL and not deployable_vde_dev_only: 1875 model_grants_table_type = DataObjectType.VIEW 1876 else: 1877 model_grants_table_type = model.grants_table_type 1878 1879 if ( 1880 model_grants_target_layer.is_all 1881 or model_grants_target_layer == target_layer 1882 # Always apply grants in production when VDE is dev_only regardless of target_layer 1883 # since only physical tables are created in production 1884 or deployable_vde_dev_only 1885 ): 1886 logger.info(f"Applying grants for model {model.name} to table {table_name}") 1887 self.adapter.sync_grants_config( 1888 exp.to_table(table_name, dialect=self.adapter.dialect), 1889 grants_config, 1890 model_grants_table_type, 1891 ) 1892 else: 1893 logger.debug( 1894 f"Skipping grants application for model {model.name} in {target_layer} layer" 1895 )
Helper class that provides a standard way to create an ABC using inheritance.
1698 @abc.abstractmethod 1699 def insert( 1700 self, 1701 table_name: str, 1702 query_or_df: QueryOrDF, 1703 model: Model, 1704 is_first_insert: bool, 1705 render_kwargs: t.Dict[str, t.Any], 1706 **kwargs: t.Any, 1707 ) -> None: 1708 """Inserts the given query or a DataFrame into the target table or a view. 1709 1710 Args: 1711 table_name: The name of the target table or view. 1712 query_or_df: A query or a DataFrame to insert. 1713 model: The target model. 1714 is_first_insert: Whether this is the first insert for this version of a model. This value is set to True 1715 if no data has been previously inserted into the target table, or when the entire history of the target model has 1716 been restated. Note that in the latter case, the table might contain data from previous executions, and it is the 1717 responsibility of a specific evaluation strategy to handle the truncation of the table if necessary. 1718 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1719 """
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
1721 @abc.abstractmethod 1722 def append( 1723 self, 1724 table_name: str, 1725 query_or_df: QueryOrDF, 1726 model: Model, 1727 render_kwargs: t.Dict[str, t.Any], 1728 **kwargs: t.Any, 1729 ) -> None: 1730 """Appends the given query or a DataFrame to the existing table. 1731 1732 Args: 1733 table_name: The target table name. 1734 query_or_df: A query or a DataFrame to insert. 1735 model: The target model. 1736 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1737 """
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
1739 @abc.abstractmethod 1740 def create( 1741 self, 1742 table_name: str, 1743 model: Model, 1744 is_table_deployable: bool, 1745 render_kwargs: t.Dict[str, t.Any], 1746 skip_grants: bool, 1747 **kwargs: t.Any, 1748 ) -> None: 1749 """Creates the target table or view. 1750 1751 Note that the intention here is to just create the table structure, data is loaded in insert() and append() 1752 1753 Args: 1754 table_name: The name of a table or a view. 1755 model: The target model. 1756 is_table_deployable: True if this creation request is for the "main" table that *might* be deployed to a production environment. 1757 False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex 1758 which determines if the snapshot is deployable to production or not 1759 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 1760 """
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
1762 @abc.abstractmethod 1763 def migrate( 1764 self, 1765 target_table_name: str, 1766 source_table_name: str, 1767 snapshot: Snapshot, 1768 *, 1769 ignore_destructive: bool, 1770 ignore_additive: bool, 1771 **kwargs: t.Any, 1772 ) -> None: 1773 """Migrates the target table schema so that it corresponds to the source table schema. 1774 1775 Args: 1776 target_table_name: The target table name. 1777 source_table_name: The source table name. 1778 snapshot: The target snapshot. 1779 ignore_destructive: If True, destructive changes are not created when migrating. 1780 This is used for forward-only models that are being migrated to a new version. 1781 ignore_additive: If True, additive changes are not created when migrating. 1782 This is used for forward-only models that are being migrated to a new version. 1783 """
Migrates the target table schema so that it corresponds to the source table schema.
Arguments:
- target_table_name: The target table name.
- source_table_name: The source table name.
- snapshot: The target snapshot.
- ignore_destructive: If True, destructive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
- ignore_additive: If True, additive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
1785 @abc.abstractmethod 1786 def delete(self, name: str, **kwargs: t.Any) -> None: 1787 """Deletes a target table or a view. 1788 1789 Args: 1790 name: The name of a table or a view. 1791 """
Deletes a target table or a view.
Arguments:
- name: The name of a table or a view.
1793 @abc.abstractmethod 1794 def promote( 1795 self, 1796 table_name: str, 1797 view_name: str, 1798 model: Model, 1799 environment: str, 1800 **kwargs: t.Any, 1801 ) -> None: 1802 """Updates the target view to point to the target table. 1803 1804 Args: 1805 table_name: The name of a table in the physical layer that is being promoted. 1806 view_name: The name of the target view in the virtual layer. 1807 model: The model that is being promoted. 1808 environment: The name of the target environment. 1809 """
Updates the target view to point to the target table.
Arguments:
- table_name: The name of a table in the physical layer that is being promoted.
- view_name: The name of the target view in the virtual layer.
- model: The model that is being promoted.
- environment: The name of the target environment.
1811 @abc.abstractmethod 1812 def demote(self, view_name: str, **kwargs: t.Any) -> None: 1813 """Deletes the target view in the virtual layer. 1814 1815 Args: 1816 view_name: The name of the target view in the virtual layer. 1817 """
Deletes the target view in the virtual layer.
Arguments:
- view_name: The name of the target view in the virtual layer.
1819 @abc.abstractmethod 1820 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 1821 """Executes the snapshot's pre statements. 1822 1823 Args: 1824 snapshot: The target snapshot. 1825 render_kwargs: Additional key-value arguments to pass when rendering the statements. 1826 """
Executes the snapshot's pre statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
1828 @abc.abstractmethod 1829 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 1830 """Executes the snapshot's post statements. 1831 1832 Args: 1833 snapshot: The target snapshot. 1834 render_kwargs: Additional key-value arguments to pass when rendering the statements. 1835 """
Executes the snapshot's post statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
1898class SymbolicStrategy(EvaluationStrategy): 1899 def insert( 1900 self, 1901 table_name: str, 1902 query_or_df: QueryOrDF, 1903 model: Model, 1904 is_first_insert: bool, 1905 render_kwargs: t.Dict[str, t.Any], 1906 **kwargs: t.Any, 1907 ) -> None: 1908 pass 1909 1910 def append( 1911 self, 1912 table_name: str, 1913 query_or_df: QueryOrDF, 1914 model: Model, 1915 render_kwargs: t.Dict[str, t.Any], 1916 **kwargs: t.Any, 1917 ) -> None: 1918 pass 1919 1920 def create( 1921 self, 1922 table_name: str, 1923 model: Model, 1924 is_table_deployable: bool, 1925 render_kwargs: t.Dict[str, t.Any], 1926 skip_grants: bool, 1927 **kwargs: t.Any, 1928 ) -> None: 1929 pass 1930 1931 def migrate( 1932 self, 1933 target_table_name: str, 1934 source_table_name: str, 1935 snapshot: Snapshot, 1936 *, 1937 ignore_destructive: bool, 1938 ignore_additive: bool, 1939 **kwarg: t.Any, 1940 ) -> None: 1941 pass 1942 1943 def delete(self, name: str, **kwargs: t.Any) -> None: 1944 pass 1945 1946 def promote( 1947 self, 1948 table_name: str, 1949 view_name: str, 1950 model: Model, 1951 environment: str, 1952 **kwargs: t.Any, 1953 ) -> None: 1954 pass 1955 1956 def demote(self, view_name: str, **kwargs: t.Any) -> None: 1957 pass 1958 1959 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: 1960 pass 1961 1962 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: 1963 pass
Helper class that provides a standard way to create an ABC using inheritance.
1899 def insert( 1900 self, 1901 table_name: str, 1902 query_or_df: QueryOrDF, 1903 model: Model, 1904 is_first_insert: bool, 1905 render_kwargs: t.Dict[str, t.Any], 1906 **kwargs: t.Any, 1907 ) -> None: 1908 pass
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
1910 def append( 1911 self, 1912 table_name: str, 1913 query_or_df: QueryOrDF, 1914 model: Model, 1915 render_kwargs: t.Dict[str, t.Any], 1916 **kwargs: t.Any, 1917 ) -> None: 1918 pass
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
1920 def create( 1921 self, 1922 table_name: str, 1923 model: Model, 1924 is_table_deployable: bool, 1925 render_kwargs: t.Dict[str, t.Any], 1926 skip_grants: bool, 1927 **kwargs: t.Any, 1928 ) -> None: 1929 pass
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
1931 def migrate( 1932 self, 1933 target_table_name: str, 1934 source_table_name: str, 1935 snapshot: Snapshot, 1936 *, 1937 ignore_destructive: bool, 1938 ignore_additive: bool, 1939 **kwarg: t.Any, 1940 ) -> None: 1941 pass
Migrates the target table schema so that it corresponds to the source table schema.
Arguments:
- target_table_name: The target table name.
- source_table_name: The source table name.
- snapshot: The target snapshot.
- ignore_destructive: If True, destructive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
- ignore_additive: If True, additive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
Deletes a target table or a view.
Arguments:
- name: The name of a table or a view.
1946 def promote( 1947 self, 1948 table_name: str, 1949 view_name: str, 1950 model: Model, 1951 environment: str, 1952 **kwargs: t.Any, 1953 ) -> None: 1954 pass
Updates the target view to point to the target table.
Arguments:
- table_name: The name of a table in the physical layer that is being promoted.
- view_name: The name of the target view in the virtual layer.
- model: The model that is being promoted.
- environment: The name of the target environment.
Deletes the target view in the virtual layer.
Arguments:
- view_name: The name of the target view in the virtual layer.
1959 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: 1960 pass
Executes the snapshot's pre statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
1962 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Dict[str, t.Any]) -> None: 1963 pass
Executes the snapshot's post statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
Inherited Members
1966class EmbeddedStrategy(SymbolicStrategy): 1967 def promote( 1968 self, 1969 table_name: str, 1970 view_name: str, 1971 model: Model, 1972 environment: str, 1973 **kwargs: t.Any, 1974 ) -> None: 1975 logger.info("Dropping view '%s' for non-materialized table", view_name) 1976 self.adapter.drop_view(view_name, cascade=False)
Helper class that provides a standard way to create an ABC using inheritance.
1967 def promote( 1968 self, 1969 table_name: str, 1970 view_name: str, 1971 model: Model, 1972 environment: str, 1973 **kwargs: t.Any, 1974 ) -> None: 1975 logger.info("Dropping view '%s' for non-materialized table", view_name) 1976 self.adapter.drop_view(view_name, cascade=False)
Updates the target view to point to the target table.
Arguments:
- table_name: The name of a table in the physical layer that is being promoted.
- view_name: The name of the target view in the virtual layer.
- model: The model that is being promoted.
- environment: The name of the target environment.
1979class PromotableStrategy(EvaluationStrategy, abc.ABC): 1980 def promote( 1981 self, 1982 table_name: str, 1983 view_name: str, 1984 model: Model, 1985 environment: str, 1986 **kwargs: t.Any, 1987 ) -> None: 1988 is_prod = environment == c.PROD 1989 logger.info("Updating view '%s' to point at table '%s'", view_name, table_name) 1990 render_kwargs: t.Dict[str, t.Any] = dict( 1991 start=kwargs.get("start"), 1992 end=kwargs.get("end"), 1993 execution_time=kwargs.get("execution_time"), 1994 engine_adapter=kwargs.get("engine_adapter"), 1995 snapshots=kwargs.get("snapshots"), 1996 deployability_index=kwargs.get("deployability_index"), 1997 table_mapping=kwargs.get("table_mapping"), 1998 runtime_stage=kwargs.get("runtime_stage"), 1999 ) 2000 self.adapter.create_view( 2001 view_name, 2002 exp.select("*").from_(table_name, dialect=self.adapter.dialect), 2003 table_description=model.description if is_prod else None, 2004 column_descriptions=model.column_descriptions if is_prod else None, 2005 view_properties=model.render_virtual_properties(**render_kwargs), 2006 ) 2007 2008 snapshot = kwargs.get("snapshot") 2009 deployability_index = kwargs.get("deployability_index") 2010 is_snapshot_deployable = ( 2011 deployability_index.is_deployable(snapshot) 2012 if snapshot and deployability_index 2013 else False 2014 ) 2015 2016 # Apply grants to the virtual layer (view) after promotion 2017 self._apply_grants(model, view_name, GrantsTargetLayer.VIRTUAL, is_snapshot_deployable) 2018 2019 def demote(self, view_name: str, **kwargs: t.Any) -> None: 2020 logger.info("Dropping view '%s'", view_name) 2021 self.adapter.drop_view(view_name, cascade=False) 2022 2023 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 2024 self.adapter.execute(snapshot.model.render_pre_statements(**render_kwargs)) 2025 2026 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 2027 self.adapter.execute(snapshot.model.render_post_statements(**render_kwargs))
Helper class that provides a standard way to create an ABC using inheritance.
1980 def promote( 1981 self, 1982 table_name: str, 1983 view_name: str, 1984 model: Model, 1985 environment: str, 1986 **kwargs: t.Any, 1987 ) -> None: 1988 is_prod = environment == c.PROD 1989 logger.info("Updating view '%s' to point at table '%s'", view_name, table_name) 1990 render_kwargs: t.Dict[str, t.Any] = dict( 1991 start=kwargs.get("start"), 1992 end=kwargs.get("end"), 1993 execution_time=kwargs.get("execution_time"), 1994 engine_adapter=kwargs.get("engine_adapter"), 1995 snapshots=kwargs.get("snapshots"), 1996 deployability_index=kwargs.get("deployability_index"), 1997 table_mapping=kwargs.get("table_mapping"), 1998 runtime_stage=kwargs.get("runtime_stage"), 1999 ) 2000 self.adapter.create_view( 2001 view_name, 2002 exp.select("*").from_(table_name, dialect=self.adapter.dialect), 2003 table_description=model.description if is_prod else None, 2004 column_descriptions=model.column_descriptions if is_prod else None, 2005 view_properties=model.render_virtual_properties(**render_kwargs), 2006 ) 2007 2008 snapshot = kwargs.get("snapshot") 2009 deployability_index = kwargs.get("deployability_index") 2010 is_snapshot_deployable = ( 2011 deployability_index.is_deployable(snapshot) 2012 if snapshot and deployability_index 2013 else False 2014 ) 2015 2016 # Apply grants to the virtual layer (view) after promotion 2017 self._apply_grants(model, view_name, GrantsTargetLayer.VIRTUAL, is_snapshot_deployable)
Updates the target view to point to the target table.
Arguments:
- table_name: The name of a table in the physical layer that is being promoted.
- view_name: The name of the target view in the virtual layer.
- model: The model that is being promoted.
- environment: The name of the target environment.
2019 def demote(self, view_name: str, **kwargs: t.Any) -> None: 2020 logger.info("Dropping view '%s'", view_name) 2021 self.adapter.drop_view(view_name, cascade=False)
Deletes the target view in the virtual layer.
Arguments:
- view_name: The name of the target view in the virtual layer.
2023 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 2024 self.adapter.execute(snapshot.model.render_pre_statements(**render_kwargs))
Executes the snapshot's pre statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
2026 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 2027 self.adapter.execute(snapshot.model.render_post_statements(**render_kwargs))
Executes the snapshot's post statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
2030class MaterializableStrategy(PromotableStrategy, abc.ABC): 2031 def create( 2032 self, 2033 table_name: str, 2034 model: Model, 2035 is_table_deployable: bool, 2036 render_kwargs: t.Dict[str, t.Any], 2037 skip_grants: bool, 2038 **kwargs: t.Any, 2039 ) -> None: 2040 ctas_query = model.ctas_query(**render_kwargs) 2041 physical_properties = kwargs.get("physical_properties", model.physical_properties) 2042 2043 logger.info("Creating table '%s'", table_name) 2044 if model.annotated: 2045 self.adapter.create_table( 2046 table_name, 2047 target_columns_to_types=model.columns_to_types_or_raise, 2048 table_format=model.table_format, 2049 storage_format=model.storage_format, 2050 partitioned_by=model.partitioned_by, 2051 partition_interval_unit=model.partition_interval_unit, 2052 clustered_by=model.clustered_by, 2053 table_properties=physical_properties, 2054 table_description=model.description if is_table_deployable else None, 2055 column_descriptions=model.column_descriptions if is_table_deployable else None, 2056 ) 2057 2058 # If we create both temp and prod tables, we need to make sure that we dry run once. 2059 dry_run = kwargs.get("dry_run", True) or not is_table_deployable 2060 2061 # Only sql models have queries that can be tested. 2062 # We also need to make sure that we don't dry run on Redshift because its planner / optimizer sometimes 2063 # breaks on our CTAS queries due to us relying on the WHERE FALSE LIMIT 0 combo. 2064 if model.is_sql and dry_run and self.adapter.dialect != "redshift": 2065 logger.info("Dry running model '%s'", model.name) 2066 self.adapter.fetchall(ctas_query) 2067 else: 2068 self.adapter.ctas( 2069 table_name, 2070 ctas_query, 2071 model.columns_to_types, 2072 table_format=model.table_format, 2073 storage_format=model.storage_format, 2074 partitioned_by=model.partitioned_by, 2075 partition_interval_unit=model.partition_interval_unit, 2076 clustered_by=model.clustered_by, 2077 table_properties=physical_properties, 2078 table_description=model.description if is_table_deployable else None, 2079 column_descriptions=model.column_descriptions if is_table_deployable else None, 2080 ) 2081 2082 # Apply grants after table creation (unless explicitly skipped by caller) 2083 if not skip_grants: 2084 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2085 self._apply_grants( 2086 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2087 ) 2088 2089 def migrate( 2090 self, 2091 target_table_name: str, 2092 source_table_name: str, 2093 snapshot: Snapshot, 2094 *, 2095 ignore_destructive: bool, 2096 ignore_additive: bool, 2097 **kwargs: t.Any, 2098 ) -> None: 2099 logger.info(f"Altering table '{target_table_name}'") 2100 alter_operations = self.adapter.get_alter_operations( 2101 target_table_name, 2102 source_table_name, 2103 ignore_destructive=ignore_destructive, 2104 ignore_additive=ignore_additive, 2105 ) 2106 _check_destructive_schema_change( 2107 snapshot, alter_operations, kwargs["allow_destructive_snapshots"] 2108 ) 2109 _check_additive_schema_change( 2110 snapshot, alter_operations, kwargs["allow_additive_snapshots"] 2111 ) 2112 self.adapter.alter_table(alter_operations) 2113 2114 # Apply grants after schema migration 2115 deployability_index = kwargs.get("deployability_index") 2116 is_snapshot_deployable = ( 2117 deployability_index.is_deployable(snapshot) if deployability_index else False 2118 ) 2119 self._apply_grants( 2120 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2121 ) 2122 2123 def delete(self, name: str, **kwargs: t.Any) -> None: 2124 _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) 2125 self.adapter.drop_table(name, cascade=kwargs.pop("cascade", False)) 2126 logger.info("Dropped table '%s'", name) 2127 2128 def _replace_query_for_model( 2129 self, 2130 model: Model, 2131 name: str, 2132 query_or_df: QueryOrDF, 2133 render_kwargs: t.Dict[str, t.Any], 2134 skip_grants: bool = False, 2135 **kwargs: t.Any, 2136 ) -> None: 2137 """Replaces the table for the given model. 2138 2139 Args: 2140 model: The target model. 2141 name: The name of the target table. 2142 query_or_df: The query or DataFrame to replace the target table with. 2143 """ 2144 if (model.is_seed or model.kind.is_full) and model.annotated: 2145 columns_to_types = model.columns_to_types_or_raise 2146 source_columns: t.Optional[t.List[str]] = list(columns_to_types) 2147 else: 2148 try: 2149 # Source columns from the underlying table to prevent unintentional table schema changes during restatement of incremental models. 2150 columns_to_types, source_columns = self._get_target_and_source_columns( 2151 model, name, render_kwargs, force_get_columns_from_target=True 2152 ) 2153 except Exception: 2154 columns_to_types, source_columns = None, None 2155 2156 self.adapter.replace_query( 2157 name, 2158 query_or_df, 2159 table_format=model.table_format, 2160 storage_format=model.storage_format, 2161 partitioned_by=model.partitioned_by, 2162 partition_interval_unit=model.partition_interval_unit, 2163 clustered_by=model.clustered_by, 2164 table_properties=kwargs.get("physical_properties", model.physical_properties), 2165 table_description=model.description, 2166 column_descriptions=model.column_descriptions, 2167 target_columns_to_types=columns_to_types, 2168 source_columns=source_columns, 2169 ) 2170 2171 # Apply grants after table replacement (unless explicitly skipped by caller) 2172 if not skip_grants: 2173 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2174 self._apply_grants(model, name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) 2175 2176 def _get_target_and_source_columns( 2177 self, 2178 model: Model, 2179 table_name: str, 2180 render_kwargs: t.Dict[str, t.Any], 2181 force_get_columns_from_target: bool = False, 2182 ) -> t.Tuple[t.Dict[str, exp.DataType], t.Optional[t.List[str]]]: 2183 if force_get_columns_from_target: 2184 target_column_to_types = self.adapter.columns(table_name) 2185 else: 2186 target_column_to_types = ( 2187 model.columns_to_types # type: ignore 2188 if model.annotated 2189 and not model.on_destructive_change.is_ignore 2190 and not model.on_additive_change.is_ignore 2191 else self.adapter.columns(table_name) 2192 ) 2193 assert target_column_to_types is not None 2194 if model.on_destructive_change.is_ignore or model.on_additive_change.is_ignore: 2195 # We need to identify the columns that are only in the source so we create an empty table with 2196 # the user query to determine that 2197 temp_table_name = exp.table_( 2198 "diff", 2199 db=model.physical_schema, 2200 ) 2201 with self.adapter.temp_table( 2202 model.ctas_query(**render_kwargs), name=temp_table_name 2203 ) as temp_table: 2204 source_columns = list(self.adapter.columns(temp_table)) 2205 else: 2206 source_columns = None 2207 return target_column_to_types, source_columns
Helper class that provides a standard way to create an ABC using inheritance.
2031 def create( 2032 self, 2033 table_name: str, 2034 model: Model, 2035 is_table_deployable: bool, 2036 render_kwargs: t.Dict[str, t.Any], 2037 skip_grants: bool, 2038 **kwargs: t.Any, 2039 ) -> None: 2040 ctas_query = model.ctas_query(**render_kwargs) 2041 physical_properties = kwargs.get("physical_properties", model.physical_properties) 2042 2043 logger.info("Creating table '%s'", table_name) 2044 if model.annotated: 2045 self.adapter.create_table( 2046 table_name, 2047 target_columns_to_types=model.columns_to_types_or_raise, 2048 table_format=model.table_format, 2049 storage_format=model.storage_format, 2050 partitioned_by=model.partitioned_by, 2051 partition_interval_unit=model.partition_interval_unit, 2052 clustered_by=model.clustered_by, 2053 table_properties=physical_properties, 2054 table_description=model.description if is_table_deployable else None, 2055 column_descriptions=model.column_descriptions if is_table_deployable else None, 2056 ) 2057 2058 # If we create both temp and prod tables, we need to make sure that we dry run once. 2059 dry_run = kwargs.get("dry_run", True) or not is_table_deployable 2060 2061 # Only sql models have queries that can be tested. 2062 # We also need to make sure that we don't dry run on Redshift because its planner / optimizer sometimes 2063 # breaks on our CTAS queries due to us relying on the WHERE FALSE LIMIT 0 combo. 2064 if model.is_sql and dry_run and self.adapter.dialect != "redshift": 2065 logger.info("Dry running model '%s'", model.name) 2066 self.adapter.fetchall(ctas_query) 2067 else: 2068 self.adapter.ctas( 2069 table_name, 2070 ctas_query, 2071 model.columns_to_types, 2072 table_format=model.table_format, 2073 storage_format=model.storage_format, 2074 partitioned_by=model.partitioned_by, 2075 partition_interval_unit=model.partition_interval_unit, 2076 clustered_by=model.clustered_by, 2077 table_properties=physical_properties, 2078 table_description=model.description if is_table_deployable else None, 2079 column_descriptions=model.column_descriptions if is_table_deployable else None, 2080 ) 2081 2082 # Apply grants after table creation (unless explicitly skipped by caller) 2083 if not skip_grants: 2084 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2085 self._apply_grants( 2086 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2087 )
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2089 def migrate( 2090 self, 2091 target_table_name: str, 2092 source_table_name: str, 2093 snapshot: Snapshot, 2094 *, 2095 ignore_destructive: bool, 2096 ignore_additive: bool, 2097 **kwargs: t.Any, 2098 ) -> None: 2099 logger.info(f"Altering table '{target_table_name}'") 2100 alter_operations = self.adapter.get_alter_operations( 2101 target_table_name, 2102 source_table_name, 2103 ignore_destructive=ignore_destructive, 2104 ignore_additive=ignore_additive, 2105 ) 2106 _check_destructive_schema_change( 2107 snapshot, alter_operations, kwargs["allow_destructive_snapshots"] 2108 ) 2109 _check_additive_schema_change( 2110 snapshot, alter_operations, kwargs["allow_additive_snapshots"] 2111 ) 2112 self.adapter.alter_table(alter_operations) 2113 2114 # Apply grants after schema migration 2115 deployability_index = kwargs.get("deployability_index") 2116 is_snapshot_deployable = ( 2117 deployability_index.is_deployable(snapshot) if deployability_index else False 2118 ) 2119 self._apply_grants( 2120 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2121 )
Migrates the target table schema so that it corresponds to the source table schema.
Arguments:
- target_table_name: The target table name.
- source_table_name: The source table name.
- snapshot: The target snapshot.
- ignore_destructive: If True, destructive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
- ignore_additive: If True, additive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
2123 def delete(self, name: str, **kwargs: t.Any) -> None: 2124 _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) 2125 self.adapter.drop_table(name, cascade=kwargs.pop("cascade", False)) 2126 logger.info("Dropped table '%s'", name)
Deletes a target table or a view.
Arguments:
- name: The name of a table or a view.
2210class IncrementalStrategy(MaterializableStrategy, abc.ABC): 2211 def append( 2212 self, 2213 table_name: str, 2214 query_or_df: QueryOrDF, 2215 model: Model, 2216 render_kwargs: t.Dict[str, t.Any], 2217 **kwargs: t.Any, 2218 ) -> None: 2219 columns_to_types, source_columns = self._get_target_and_source_columns( 2220 model, table_name, render_kwargs=render_kwargs 2221 ) 2222 self.adapter.insert_append( 2223 table_name, 2224 query_or_df, 2225 target_columns_to_types=columns_to_types, 2226 source_columns=source_columns, 2227 )
Helper class that provides a standard way to create an ABC using inheritance.
2211 def append( 2212 self, 2213 table_name: str, 2214 query_or_df: QueryOrDF, 2215 model: Model, 2216 render_kwargs: t.Dict[str, t.Any], 2217 **kwargs: t.Any, 2218 ) -> None: 2219 columns_to_types, source_columns = self._get_target_and_source_columns( 2220 model, table_name, render_kwargs=render_kwargs 2221 ) 2222 self.adapter.insert_append( 2223 table_name, 2224 query_or_df, 2225 target_columns_to_types=columns_to_types, 2226 source_columns=source_columns, 2227 )
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2230class IncrementalByPartitionStrategy(IncrementalStrategy): 2231 def insert( 2232 self, 2233 table_name: str, 2234 query_or_df: QueryOrDF, 2235 model: Model, 2236 is_first_insert: bool, 2237 render_kwargs: t.Dict[str, t.Any], 2238 **kwargs: t.Any, 2239 ) -> None: 2240 if is_first_insert: 2241 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2242 else: 2243 columns_to_types, source_columns = self._get_target_and_source_columns( 2244 model, table_name, render_kwargs=render_kwargs 2245 ) 2246 self.adapter.insert_overwrite_by_partition( 2247 table_name, 2248 query_or_df, 2249 partitioned_by=model.partitioned_by, 2250 target_columns_to_types=columns_to_types, 2251 source_columns=source_columns, 2252 )
Helper class that provides a standard way to create an ABC using inheritance.
2231 def insert( 2232 self, 2233 table_name: str, 2234 query_or_df: QueryOrDF, 2235 model: Model, 2236 is_first_insert: bool, 2237 render_kwargs: t.Dict[str, t.Any], 2238 **kwargs: t.Any, 2239 ) -> None: 2240 if is_first_insert: 2241 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2242 else: 2243 columns_to_types, source_columns = self._get_target_and_source_columns( 2244 model, table_name, render_kwargs=render_kwargs 2245 ) 2246 self.adapter.insert_overwrite_by_partition( 2247 table_name, 2248 query_or_df, 2249 partitioned_by=model.partitioned_by, 2250 target_columns_to_types=columns_to_types, 2251 source_columns=source_columns, 2252 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2255class IncrementalByTimeRangeStrategy(IncrementalStrategy): 2256 def insert( 2257 self, 2258 table_name: str, 2259 query_or_df: QueryOrDF, 2260 model: Model, 2261 is_first_insert: bool, 2262 render_kwargs: t.Dict[str, t.Any], 2263 **kwargs: t.Any, 2264 ) -> None: 2265 assert model.time_column 2266 columns_to_types, source_columns = self._get_target_and_source_columns( 2267 model, table_name, render_kwargs=render_kwargs 2268 ) 2269 self.adapter.insert_overwrite_by_time_partition( 2270 table_name, 2271 query_or_df, 2272 time_formatter=model.convert_to_time_column, 2273 time_column=model.time_column, 2274 target_columns_to_types=columns_to_types, 2275 source_columns=source_columns, 2276 **kwargs, 2277 )
Helper class that provides a standard way to create an ABC using inheritance.
2256 def insert( 2257 self, 2258 table_name: str, 2259 query_or_df: QueryOrDF, 2260 model: Model, 2261 is_first_insert: bool, 2262 render_kwargs: t.Dict[str, t.Any], 2263 **kwargs: t.Any, 2264 ) -> None: 2265 assert model.time_column 2266 columns_to_types, source_columns = self._get_target_and_source_columns( 2267 model, table_name, render_kwargs=render_kwargs 2268 ) 2269 self.adapter.insert_overwrite_by_time_partition( 2270 table_name, 2271 query_or_df, 2272 time_formatter=model.convert_to_time_column, 2273 time_column=model.time_column, 2274 target_columns_to_types=columns_to_types, 2275 source_columns=source_columns, 2276 **kwargs, 2277 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2280class IncrementalByUniqueKeyStrategy(IncrementalStrategy): 2281 def insert( 2282 self, 2283 table_name: str, 2284 query_or_df: QueryOrDF, 2285 model: Model, 2286 is_first_insert: bool, 2287 render_kwargs: t.Dict[str, t.Any], 2288 **kwargs: t.Any, 2289 ) -> None: 2290 if is_first_insert: 2291 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2292 else: 2293 columns_to_types, source_columns = self._get_target_and_source_columns( 2294 model, 2295 table_name, 2296 render_kwargs=render_kwargs, 2297 ) 2298 self.adapter.merge( 2299 table_name, 2300 query_or_df, 2301 target_columns_to_types=columns_to_types, 2302 unique_key=model.unique_key, 2303 when_matched=model.when_matched, 2304 merge_filter=model.render_merge_filter( 2305 start=kwargs.get("start"), 2306 end=kwargs.get("end"), 2307 execution_time=kwargs.get("execution_time"), 2308 ), 2309 physical_properties=kwargs.get("physical_properties", model.physical_properties), 2310 source_columns=source_columns, 2311 ) 2312 2313 def append( 2314 self, 2315 table_name: str, 2316 query_or_df: QueryOrDF, 2317 model: Model, 2318 render_kwargs: t.Dict[str, t.Any], 2319 **kwargs: t.Any, 2320 ) -> None: 2321 columns_to_types, source_columns = self._get_target_and_source_columns( 2322 model, table_name, render_kwargs=render_kwargs 2323 ) 2324 self.adapter.merge( 2325 table_name, 2326 query_or_df, 2327 target_columns_to_types=columns_to_types, 2328 unique_key=model.unique_key, 2329 when_matched=model.when_matched, 2330 merge_filter=model.render_merge_filter( 2331 start=kwargs.get("start"), 2332 end=kwargs.get("end"), 2333 execution_time=kwargs.get("execution_time"), 2334 ), 2335 physical_properties=kwargs.get("physical_properties", model.physical_properties), 2336 source_columns=source_columns, 2337 )
Helper class that provides a standard way to create an ABC using inheritance.
2281 def insert( 2282 self, 2283 table_name: str, 2284 query_or_df: QueryOrDF, 2285 model: Model, 2286 is_first_insert: bool, 2287 render_kwargs: t.Dict[str, t.Any], 2288 **kwargs: t.Any, 2289 ) -> None: 2290 if is_first_insert: 2291 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs) 2292 else: 2293 columns_to_types, source_columns = self._get_target_and_source_columns( 2294 model, 2295 table_name, 2296 render_kwargs=render_kwargs, 2297 ) 2298 self.adapter.merge( 2299 table_name, 2300 query_or_df, 2301 target_columns_to_types=columns_to_types, 2302 unique_key=model.unique_key, 2303 when_matched=model.when_matched, 2304 merge_filter=model.render_merge_filter( 2305 start=kwargs.get("start"), 2306 end=kwargs.get("end"), 2307 execution_time=kwargs.get("execution_time"), 2308 ), 2309 physical_properties=kwargs.get("physical_properties", model.physical_properties), 2310 source_columns=source_columns, 2311 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2313 def append( 2314 self, 2315 table_name: str, 2316 query_or_df: QueryOrDF, 2317 model: Model, 2318 render_kwargs: t.Dict[str, t.Any], 2319 **kwargs: t.Any, 2320 ) -> None: 2321 columns_to_types, source_columns = self._get_target_and_source_columns( 2322 model, table_name, render_kwargs=render_kwargs 2323 ) 2324 self.adapter.merge( 2325 table_name, 2326 query_or_df, 2327 target_columns_to_types=columns_to_types, 2328 unique_key=model.unique_key, 2329 when_matched=model.when_matched, 2330 merge_filter=model.render_merge_filter( 2331 start=kwargs.get("start"), 2332 end=kwargs.get("end"), 2333 execution_time=kwargs.get("execution_time"), 2334 ), 2335 physical_properties=kwargs.get("physical_properties", model.physical_properties), 2336 source_columns=source_columns, 2337 )
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2340class IncrementalUnmanagedStrategy(IncrementalStrategy): 2341 def append( 2342 self, 2343 table_name: str, 2344 query_or_df: QueryOrDF, 2345 model: Model, 2346 render_kwargs: t.Dict[str, t.Any], 2347 **kwargs: t.Any, 2348 ) -> None: 2349 columns_to_types, source_columns = self._get_target_and_source_columns( 2350 model, table_name, render_kwargs=render_kwargs 2351 ) 2352 self.adapter.insert_append( 2353 table_name, 2354 query_or_df, 2355 target_columns_to_types=columns_to_types, 2356 source_columns=source_columns, 2357 ) 2358 2359 def insert( 2360 self, 2361 table_name: str, 2362 query_or_df: QueryOrDF, 2363 model: Model, 2364 is_first_insert: bool, 2365 render_kwargs: t.Dict[str, t.Any], 2366 **kwargs: t.Any, 2367 ) -> None: 2368 if is_first_insert: 2369 return self._replace_query_for_model( 2370 model, table_name, query_or_df, render_kwargs, **kwargs 2371 ) 2372 if isinstance(model.kind, IncrementalUnmanagedKind) and model.kind.insert_overwrite: 2373 columns_to_types, source_columns = self._get_target_and_source_columns( 2374 model, 2375 table_name, 2376 render_kwargs=render_kwargs, 2377 ) 2378 2379 return self.adapter.insert_overwrite_by_partition( 2380 table_name, 2381 query_or_df, 2382 model.partitioned_by, 2383 target_columns_to_types=columns_to_types, 2384 source_columns=source_columns, 2385 ) 2386 return self.append( 2387 table_name, 2388 query_or_df, 2389 model, 2390 render_kwargs=render_kwargs, 2391 **kwargs, 2392 )
Helper class that provides a standard way to create an ABC using inheritance.
2341 def append( 2342 self, 2343 table_name: str, 2344 query_or_df: QueryOrDF, 2345 model: Model, 2346 render_kwargs: t.Dict[str, t.Any], 2347 **kwargs: t.Any, 2348 ) -> None: 2349 columns_to_types, source_columns = self._get_target_and_source_columns( 2350 model, table_name, render_kwargs=render_kwargs 2351 ) 2352 self.adapter.insert_append( 2353 table_name, 2354 query_or_df, 2355 target_columns_to_types=columns_to_types, 2356 source_columns=source_columns, 2357 )
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2359 def insert( 2360 self, 2361 table_name: str, 2362 query_or_df: QueryOrDF, 2363 model: Model, 2364 is_first_insert: bool, 2365 render_kwargs: t.Dict[str, t.Any], 2366 **kwargs: t.Any, 2367 ) -> None: 2368 if is_first_insert: 2369 return self._replace_query_for_model( 2370 model, table_name, query_or_df, render_kwargs, **kwargs 2371 ) 2372 if isinstance(model.kind, IncrementalUnmanagedKind) and model.kind.insert_overwrite: 2373 columns_to_types, source_columns = self._get_target_and_source_columns( 2374 model, 2375 table_name, 2376 render_kwargs=render_kwargs, 2377 ) 2378 2379 return self.adapter.insert_overwrite_by_partition( 2380 table_name, 2381 query_or_df, 2382 model.partitioned_by, 2383 target_columns_to_types=columns_to_types, 2384 source_columns=source_columns, 2385 ) 2386 return self.append( 2387 table_name, 2388 query_or_df, 2389 model, 2390 render_kwargs=render_kwargs, 2391 **kwargs, 2392 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2395class FullRefreshStrategy(MaterializableStrategy): 2396 def append( 2397 self, 2398 table_name: str, 2399 query_or_df: QueryOrDF, 2400 model: Model, 2401 render_kwargs: t.Dict[str, t.Any], 2402 **kwargs: t.Any, 2403 ) -> None: 2404 self.adapter.insert_append( 2405 table_name, 2406 query_or_df, 2407 target_columns_to_types=model.columns_to_types, 2408 ) 2409 2410 def insert( 2411 self, 2412 table_name: str, 2413 query_or_df: QueryOrDF, 2414 model: Model, 2415 is_first_insert: bool, 2416 render_kwargs: t.Dict[str, t.Any], 2417 **kwargs: t.Any, 2418 ) -> None: 2419 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs)
Helper class that provides a standard way to create an ABC using inheritance.
2396 def append( 2397 self, 2398 table_name: str, 2399 query_or_df: QueryOrDF, 2400 model: Model, 2401 render_kwargs: t.Dict[str, t.Any], 2402 **kwargs: t.Any, 2403 ) -> None: 2404 self.adapter.insert_append( 2405 table_name, 2406 query_or_df, 2407 target_columns_to_types=model.columns_to_types, 2408 )
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2410 def insert( 2411 self, 2412 table_name: str, 2413 query_or_df: QueryOrDF, 2414 model: Model, 2415 is_first_insert: bool, 2416 render_kwargs: t.Dict[str, t.Any], 2417 **kwargs: t.Any, 2418 ) -> None: 2419 self._replace_query_for_model(model, table_name, query_or_df, render_kwargs, **kwargs)
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2422class SeedStrategy(MaterializableStrategy): 2423 def create( 2424 self, 2425 table_name: str, 2426 model: Model, 2427 is_table_deployable: bool, 2428 render_kwargs: t.Dict[str, t.Any], 2429 skip_grants: bool, 2430 **kwargs: t.Any, 2431 ) -> None: 2432 model = t.cast(SeedModel, model) 2433 if not model.is_hydrated and self.adapter.table_exists(table_name): 2434 # This likely means that the table was created and populated previously, but the evaluation stage 2435 # failed before the interval could be added for this model. 2436 logger.warning( 2437 "Seed model '%s' is not hydrated, but the table '%s' exists. Skipping creation", 2438 model.name, 2439 table_name, 2440 ) 2441 return 2442 2443 super().create( 2444 table_name, 2445 model, 2446 is_table_deployable, 2447 render_kwargs, 2448 skip_grants=True, # Skip grants; they're applied after data insertion 2449 **kwargs, 2450 ) 2451 # For seeds we insert data at the time of table creation. 2452 try: 2453 for index, df in enumerate(model.render_seed()): 2454 if index == 0: 2455 self._replace_query_for_model( 2456 model, 2457 table_name, 2458 df, 2459 render_kwargs, 2460 skip_grants=True, # Skip grants; they're applied after data insertion 2461 **kwargs, 2462 ) 2463 else: 2464 self.adapter.insert_append( 2465 table_name, df, target_columns_to_types=model.columns_to_types 2466 ) 2467 2468 if not skip_grants: 2469 # Apply grants after seed table creation and data insertion 2470 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2471 self._apply_grants( 2472 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2473 ) 2474 except Exception: 2475 self.adapter.drop_table(table_name) 2476 raise 2477 2478 def migrate( 2479 self, 2480 target_table_name: str, 2481 source_table_name: str, 2482 snapshot: Snapshot, 2483 *, 2484 ignore_destructive: bool, 2485 ignore_additive: bool, 2486 **kwargs: t.Any, 2487 ) -> None: 2488 raise NotImplementedError("Seeds do not support migrations.") 2489 2490 def insert( 2491 self, 2492 table_name: str, 2493 query_or_df: QueryOrDF, 2494 model: Model, 2495 is_first_insert: bool, 2496 render_kwargs: t.Dict[str, t.Any], 2497 **kwargs: t.Any, 2498 ) -> None: 2499 # Data has already been inserted at the time of table creation. 2500 pass 2501 2502 def append( 2503 self, 2504 table_name: str, 2505 query_or_df: QueryOrDF, 2506 model: Model, 2507 render_kwargs: t.Dict[str, t.Any], 2508 **kwargs: t.Any, 2509 ) -> None: 2510 # Data has already been inserted at the time of table creation. 2511 pass
Helper class that provides a standard way to create an ABC using inheritance.
2423 def create( 2424 self, 2425 table_name: str, 2426 model: Model, 2427 is_table_deployable: bool, 2428 render_kwargs: t.Dict[str, t.Any], 2429 skip_grants: bool, 2430 **kwargs: t.Any, 2431 ) -> None: 2432 model = t.cast(SeedModel, model) 2433 if not model.is_hydrated and self.adapter.table_exists(table_name): 2434 # This likely means that the table was created and populated previously, but the evaluation stage 2435 # failed before the interval could be added for this model. 2436 logger.warning( 2437 "Seed model '%s' is not hydrated, but the table '%s' exists. Skipping creation", 2438 model.name, 2439 table_name, 2440 ) 2441 return 2442 2443 super().create( 2444 table_name, 2445 model, 2446 is_table_deployable, 2447 render_kwargs, 2448 skip_grants=True, # Skip grants; they're applied after data insertion 2449 **kwargs, 2450 ) 2451 # For seeds we insert data at the time of table creation. 2452 try: 2453 for index, df in enumerate(model.render_seed()): 2454 if index == 0: 2455 self._replace_query_for_model( 2456 model, 2457 table_name, 2458 df, 2459 render_kwargs, 2460 skip_grants=True, # Skip grants; they're applied after data insertion 2461 **kwargs, 2462 ) 2463 else: 2464 self.adapter.insert_append( 2465 table_name, df, target_columns_to_types=model.columns_to_types 2466 ) 2467 2468 if not skip_grants: 2469 # Apply grants after seed table creation and data insertion 2470 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2471 self._apply_grants( 2472 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2473 ) 2474 except Exception: 2475 self.adapter.drop_table(table_name) 2476 raise
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2478 def migrate( 2479 self, 2480 target_table_name: str, 2481 source_table_name: str, 2482 snapshot: Snapshot, 2483 *, 2484 ignore_destructive: bool, 2485 ignore_additive: bool, 2486 **kwargs: t.Any, 2487 ) -> None: 2488 raise NotImplementedError("Seeds do not support migrations.")
Migrates the target table schema so that it corresponds to the source table schema.
Arguments:
- target_table_name: The target table name.
- source_table_name: The source table name.
- snapshot: The target snapshot.
- ignore_destructive: If True, destructive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
- ignore_additive: If True, additive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
2490 def insert( 2491 self, 2492 table_name: str, 2493 query_or_df: QueryOrDF, 2494 model: Model, 2495 is_first_insert: bool, 2496 render_kwargs: t.Dict[str, t.Any], 2497 **kwargs: t.Any, 2498 ) -> None: 2499 # Data has already been inserted at the time of table creation. 2500 pass
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2502 def append( 2503 self, 2504 table_name: str, 2505 query_or_df: QueryOrDF, 2506 model: Model, 2507 render_kwargs: t.Dict[str, t.Any], 2508 **kwargs: t.Any, 2509 ) -> None: 2510 # Data has already been inserted at the time of table creation. 2511 pass
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2514class SCDType2Strategy(IncrementalStrategy): 2515 def create( 2516 self, 2517 table_name: str, 2518 model: Model, 2519 is_table_deployable: bool, 2520 render_kwargs: t.Dict[str, t.Any], 2521 skip_grants: bool, 2522 **kwargs: t.Any, 2523 ) -> None: 2524 assert isinstance(model.kind, (SCDType2ByTimeKind, SCDType2ByColumnKind)) 2525 if model.annotated: 2526 logger.info("Creating table '%s'", table_name) 2527 columns_to_types = model.columns_to_types_or_raise 2528 if isinstance(model.kind, SCDType2ByTimeKind): 2529 columns_to_types[model.kind.updated_at_name.name] = model.kind.time_data_type 2530 self.adapter.create_table( 2531 table_name, 2532 target_columns_to_types=columns_to_types, 2533 table_format=model.table_format, 2534 storage_format=model.storage_format, 2535 partitioned_by=model.partitioned_by, 2536 partition_interval_unit=model.partition_interval_unit, 2537 clustered_by=model.clustered_by, 2538 table_properties=kwargs.get("physical_properties", model.physical_properties), 2539 table_description=model.description if is_table_deployable else None, 2540 column_descriptions=model.column_descriptions if is_table_deployable else None, 2541 ) 2542 else: 2543 # We assume that the data type for `updated_at_name` matches the data type that is defined for 2544 # `time_data_type`. If that isn't the case, then the user might get an error about not being able 2545 # to do comparisons across different data types 2546 super().create( 2547 table_name, 2548 model, 2549 is_table_deployable, 2550 render_kwargs, 2551 skip_grants, 2552 **kwargs, 2553 ) 2554 2555 if not skip_grants: 2556 # Apply grants after SCD Type 2 table creation 2557 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2558 self._apply_grants( 2559 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2560 ) 2561 2562 def insert( 2563 self, 2564 table_name: str, 2565 query_or_df: QueryOrDF, 2566 model: Model, 2567 is_first_insert: bool, 2568 render_kwargs: t.Dict[str, t.Any], 2569 **kwargs: t.Any, 2570 ) -> None: 2571 # Source columns from the underlying table to prevent unintentional table schema changes during restatement of incremental models. 2572 columns_to_types, source_columns = self._get_target_and_source_columns( 2573 model, 2574 table_name, 2575 render_kwargs=render_kwargs, 2576 force_get_columns_from_target=True, 2577 ) 2578 if isinstance(model.kind, SCDType2ByTimeKind): 2579 self.adapter.scd_type_2_by_time( 2580 target_table=table_name, 2581 source_table=query_or_df, 2582 unique_key=model.unique_key, 2583 valid_from_col=model.kind.valid_from_name, 2584 valid_to_col=model.kind.valid_to_name, 2585 execution_time=kwargs["execution_time"], 2586 updated_at_col=model.kind.updated_at_name, 2587 invalidate_hard_deletes=model.kind.invalidate_hard_deletes, 2588 updated_at_as_valid_from=model.kind.updated_at_as_valid_from, 2589 target_columns_to_types=columns_to_types, 2590 table_format=model.table_format, 2591 table_description=model.description, 2592 column_descriptions=model.column_descriptions, 2593 truncate=is_first_insert, 2594 source_columns=source_columns, 2595 storage_format=model.storage_format, 2596 partitioned_by=model.partitioned_by, 2597 partition_interval_unit=model.partition_interval_unit, 2598 clustered_by=model.clustered_by, 2599 table_properties=kwargs.get("physical_properties", model.physical_properties), 2600 ) 2601 elif isinstance(model.kind, SCDType2ByColumnKind): 2602 self.adapter.scd_type_2_by_column( 2603 target_table=table_name, 2604 source_table=query_or_df, 2605 unique_key=model.unique_key, 2606 valid_from_col=model.kind.valid_from_name, 2607 valid_to_col=model.kind.valid_to_name, 2608 execution_time=model.kind.updated_at_name or kwargs["execution_time"], 2609 check_columns=model.kind.columns, 2610 invalidate_hard_deletes=model.kind.invalidate_hard_deletes, 2611 execution_time_as_valid_from=model.kind.execution_time_as_valid_from, 2612 target_columns_to_types=columns_to_types, 2613 table_format=model.table_format, 2614 table_description=model.description, 2615 column_descriptions=model.column_descriptions, 2616 truncate=is_first_insert, 2617 source_columns=source_columns, 2618 storage_format=model.storage_format, 2619 partitioned_by=model.partitioned_by, 2620 partition_interval_unit=model.partition_interval_unit, 2621 clustered_by=model.clustered_by, 2622 table_properties=kwargs.get("physical_properties", model.physical_properties), 2623 ) 2624 else: 2625 raise SQLMeshError( 2626 f"Unexpected SCD Type 2 kind: {model.kind}. This is not expected and please report this as a bug." 2627 ) 2628 2629 # Apply grants after SCD Type 2 table recreation 2630 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2631 self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) 2632 2633 def append( 2634 self, 2635 table_name: str, 2636 query_or_df: QueryOrDF, 2637 model: Model, 2638 render_kwargs: t.Dict[str, t.Any], 2639 **kwargs: t.Any, 2640 ) -> None: 2641 return self.insert( 2642 table_name, 2643 query_or_df, 2644 model, 2645 is_first_insert=False, 2646 render_kwargs=render_kwargs, 2647 **kwargs, 2648 )
Helper class that provides a standard way to create an ABC using inheritance.
2515 def create( 2516 self, 2517 table_name: str, 2518 model: Model, 2519 is_table_deployable: bool, 2520 render_kwargs: t.Dict[str, t.Any], 2521 skip_grants: bool, 2522 **kwargs: t.Any, 2523 ) -> None: 2524 assert isinstance(model.kind, (SCDType2ByTimeKind, SCDType2ByColumnKind)) 2525 if model.annotated: 2526 logger.info("Creating table '%s'", table_name) 2527 columns_to_types = model.columns_to_types_or_raise 2528 if isinstance(model.kind, SCDType2ByTimeKind): 2529 columns_to_types[model.kind.updated_at_name.name] = model.kind.time_data_type 2530 self.adapter.create_table( 2531 table_name, 2532 target_columns_to_types=columns_to_types, 2533 table_format=model.table_format, 2534 storage_format=model.storage_format, 2535 partitioned_by=model.partitioned_by, 2536 partition_interval_unit=model.partition_interval_unit, 2537 clustered_by=model.clustered_by, 2538 table_properties=kwargs.get("physical_properties", model.physical_properties), 2539 table_description=model.description if is_table_deployable else None, 2540 column_descriptions=model.column_descriptions if is_table_deployable else None, 2541 ) 2542 else: 2543 # We assume that the data type for `updated_at_name` matches the data type that is defined for 2544 # `time_data_type`. If that isn't the case, then the user might get an error about not being able 2545 # to do comparisons across different data types 2546 super().create( 2547 table_name, 2548 model, 2549 is_table_deployable, 2550 render_kwargs, 2551 skip_grants, 2552 **kwargs, 2553 ) 2554 2555 if not skip_grants: 2556 # Apply grants after SCD Type 2 table creation 2557 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2558 self._apply_grants( 2559 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2560 )
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2562 def insert( 2563 self, 2564 table_name: str, 2565 query_or_df: QueryOrDF, 2566 model: Model, 2567 is_first_insert: bool, 2568 render_kwargs: t.Dict[str, t.Any], 2569 **kwargs: t.Any, 2570 ) -> None: 2571 # Source columns from the underlying table to prevent unintentional table schema changes during restatement of incremental models. 2572 columns_to_types, source_columns = self._get_target_and_source_columns( 2573 model, 2574 table_name, 2575 render_kwargs=render_kwargs, 2576 force_get_columns_from_target=True, 2577 ) 2578 if isinstance(model.kind, SCDType2ByTimeKind): 2579 self.adapter.scd_type_2_by_time( 2580 target_table=table_name, 2581 source_table=query_or_df, 2582 unique_key=model.unique_key, 2583 valid_from_col=model.kind.valid_from_name, 2584 valid_to_col=model.kind.valid_to_name, 2585 execution_time=kwargs["execution_time"], 2586 updated_at_col=model.kind.updated_at_name, 2587 invalidate_hard_deletes=model.kind.invalidate_hard_deletes, 2588 updated_at_as_valid_from=model.kind.updated_at_as_valid_from, 2589 target_columns_to_types=columns_to_types, 2590 table_format=model.table_format, 2591 table_description=model.description, 2592 column_descriptions=model.column_descriptions, 2593 truncate=is_first_insert, 2594 source_columns=source_columns, 2595 storage_format=model.storage_format, 2596 partitioned_by=model.partitioned_by, 2597 partition_interval_unit=model.partition_interval_unit, 2598 clustered_by=model.clustered_by, 2599 table_properties=kwargs.get("physical_properties", model.physical_properties), 2600 ) 2601 elif isinstance(model.kind, SCDType2ByColumnKind): 2602 self.adapter.scd_type_2_by_column( 2603 target_table=table_name, 2604 source_table=query_or_df, 2605 unique_key=model.unique_key, 2606 valid_from_col=model.kind.valid_from_name, 2607 valid_to_col=model.kind.valid_to_name, 2608 execution_time=model.kind.updated_at_name or kwargs["execution_time"], 2609 check_columns=model.kind.columns, 2610 invalidate_hard_deletes=model.kind.invalidate_hard_deletes, 2611 execution_time_as_valid_from=model.kind.execution_time_as_valid_from, 2612 target_columns_to_types=columns_to_types, 2613 table_format=model.table_format, 2614 table_description=model.description, 2615 column_descriptions=model.column_descriptions, 2616 truncate=is_first_insert, 2617 source_columns=source_columns, 2618 storage_format=model.storage_format, 2619 partitioned_by=model.partitioned_by, 2620 partition_interval_unit=model.partition_interval_unit, 2621 clustered_by=model.clustered_by, 2622 table_properties=kwargs.get("physical_properties", model.physical_properties), 2623 ) 2624 else: 2625 raise SQLMeshError( 2626 f"Unexpected SCD Type 2 kind: {model.kind}. This is not expected and please report this as a bug." 2627 ) 2628 2629 # Apply grants after SCD Type 2 table recreation 2630 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2631 self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable)
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2633 def append( 2634 self, 2635 table_name: str, 2636 query_or_df: QueryOrDF, 2637 model: Model, 2638 render_kwargs: t.Dict[str, t.Any], 2639 **kwargs: t.Any, 2640 ) -> None: 2641 return self.insert( 2642 table_name, 2643 query_or_df, 2644 model, 2645 is_first_insert=False, 2646 render_kwargs=render_kwargs, 2647 **kwargs, 2648 )
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2651class ViewStrategy(PromotableStrategy): 2652 def insert( 2653 self, 2654 table_name: str, 2655 query_or_df: QueryOrDF, 2656 model: Model, 2657 is_first_insert: bool, 2658 render_kwargs: t.Dict[str, t.Any], 2659 **kwargs: t.Any, 2660 ) -> None: 2661 # We should recreate MVs across supported engines (Snowflake, BigQuery etc) because 2662 # if upstream tables were recreated (e.g FULL models), the MVs would be silently invalidated. 2663 # The only exception to that rule is RisingWave which doesn't support CREATE OR REPLACE, so upstream 2664 # models don't recreate their physical tables for the MVs to be invalidated. 2665 # However, even for RW we still want to recreate MVs to avoid stale references, as is the case with normal views. 2666 # The flag is_first_insert is used for that matter as a signal to recreate the MV if the snapshot's intervals 2667 # have been cleared by `should_force_rebuild` 2668 is_materialized_view = self._is_materialized_view(model) 2669 must_recreate_view = not self.adapter.HAS_VIEW_BINDING or ( 2670 is_materialized_view and is_first_insert 2671 ) 2672 2673 if self.adapter.table_exists(table_name) and not must_recreate_view: 2674 logger.info("Skipping creation of the view '%s'", table_name) 2675 return 2676 2677 logger.info("Replacing view '%s'", table_name) 2678 self.adapter.create_view( 2679 table_name, 2680 query_or_df, 2681 model.columns_to_types, 2682 replace=must_recreate_view, 2683 materialized=is_materialized_view, 2684 view_properties=kwargs.get("physical_properties", model.physical_properties), 2685 table_description=model.description, 2686 column_descriptions=model.column_descriptions, 2687 ) 2688 2689 # Apply grants after view creation / replacement 2690 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2691 self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable) 2692 2693 def append( 2694 self, 2695 table_name: str, 2696 query_or_df: QueryOrDF, 2697 model: Model, 2698 render_kwargs: t.Dict[str, t.Any], 2699 **kwargs: t.Any, 2700 ) -> None: 2701 raise ConfigError(f"Cannot append to a view '{table_name}'.") 2702 2703 def create( 2704 self, 2705 table_name: str, 2706 model: Model, 2707 is_table_deployable: bool, 2708 render_kwargs: t.Dict[str, t.Any], 2709 skip_grants: bool, 2710 **kwargs: t.Any, 2711 ) -> None: 2712 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2713 2714 if self.adapter.table_exists(table_name): 2715 # Make sure we don't recreate the view to prevent deletion of downstream views in engines with no late 2716 # binding support (because of DROP CASCADE). 2717 logger.info("View '%s' already exists", table_name) 2718 2719 if not skip_grants: 2720 # Always apply grants when present, even if view exists, to handle grants updates 2721 self._apply_grants( 2722 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2723 ) 2724 return 2725 2726 logger.info("Creating view '%s'", table_name) 2727 materialized = self._is_materialized_view(model) 2728 materialized_properties = None 2729 if materialized: 2730 materialized_properties = { 2731 "partitioned_by": model.partitioned_by, 2732 "clustered_by": model.clustered_by, 2733 "partition_interval_unit": model.partition_interval_unit, 2734 } 2735 self.adapter.create_view( 2736 table_name, 2737 model.render_query_or_raise(**render_kwargs), 2738 # Make sure we never replace the view during creation to avoid race conditions in engines with no late binding support. 2739 replace=False, 2740 materialized=self._is_materialized_view(model), 2741 materialized_properties=materialized_properties, 2742 view_properties=kwargs.get("physical_properties", model.physical_properties), 2743 table_description=model.description if is_table_deployable else None, 2744 column_descriptions=model.column_descriptions if is_table_deployable else None, 2745 ) 2746 2747 if not skip_grants: 2748 # Apply grants after view creation 2749 self._apply_grants( 2750 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2751 ) 2752 2753 def migrate( 2754 self, 2755 target_table_name: str, 2756 source_table_name: str, 2757 snapshot: Snapshot, 2758 *, 2759 ignore_destructive: bool, 2760 ignore_additive: bool, 2761 **kwargs: t.Any, 2762 ) -> None: 2763 logger.info("Migrating view '%s'", target_table_name) 2764 model = snapshot.model 2765 render_kwargs = dict( 2766 execution_time=now(), snapshots=kwargs["snapshots"], engine_adapter=self.adapter 2767 ) 2768 2769 self.adapter.create_view( 2770 target_table_name, 2771 model.render_query_or_raise(**render_kwargs), 2772 model.columns_to_types, 2773 materialized=self._is_materialized_view(model), 2774 view_properties=model.render_physical_properties(**render_kwargs), 2775 table_description=model.description, 2776 column_descriptions=model.column_descriptions, 2777 ) 2778 2779 # Apply grants after view migration 2780 deployability_index = kwargs.get("deployability_index") 2781 is_snapshot_deployable = ( 2782 deployability_index.is_deployable(snapshot) if deployability_index else False 2783 ) 2784 self._apply_grants( 2785 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2786 ) 2787 2788 def delete(self, name: str, **kwargs: t.Any) -> None: 2789 cascade = kwargs.pop("cascade", False) 2790 try: 2791 # Some engines (e.g., RisingWave) don’t fail when dropping a materialized view with a DROP VIEW statement, 2792 # because views and materialized views don’t share the same namespace. Therefore, we should not ignore if the 2793 # view doesn't exist and let the exception handler attempt to drop the materialized view. 2794 self.adapter.drop_view(name, cascade=cascade, ignore_if_not_exists=False) 2795 except Exception: 2796 logger.debug( 2797 "Failed to drop view '%s'. Trying to drop the materialized view instead", 2798 name, 2799 exc_info=True, 2800 ) 2801 self.adapter.drop_view( 2802 name, materialized=True, cascade=cascade, ignore_if_not_exists=True 2803 ) 2804 logger.info("Dropped view '%s'", name) 2805 2806 def _is_materialized_view(self, model: Model) -> bool: 2807 return isinstance(model.kind, ViewKind) and model.kind.materialized
Helper class that provides a standard way to create an ABC using inheritance.
2652 def insert( 2653 self, 2654 table_name: str, 2655 query_or_df: QueryOrDF, 2656 model: Model, 2657 is_first_insert: bool, 2658 render_kwargs: t.Dict[str, t.Any], 2659 **kwargs: t.Any, 2660 ) -> None: 2661 # We should recreate MVs across supported engines (Snowflake, BigQuery etc) because 2662 # if upstream tables were recreated (e.g FULL models), the MVs would be silently invalidated. 2663 # The only exception to that rule is RisingWave which doesn't support CREATE OR REPLACE, so upstream 2664 # models don't recreate their physical tables for the MVs to be invalidated. 2665 # However, even for RW we still want to recreate MVs to avoid stale references, as is the case with normal views. 2666 # The flag is_first_insert is used for that matter as a signal to recreate the MV if the snapshot's intervals 2667 # have been cleared by `should_force_rebuild` 2668 is_materialized_view = self._is_materialized_view(model) 2669 must_recreate_view = not self.adapter.HAS_VIEW_BINDING or ( 2670 is_materialized_view and is_first_insert 2671 ) 2672 2673 if self.adapter.table_exists(table_name) and not must_recreate_view: 2674 logger.info("Skipping creation of the view '%s'", table_name) 2675 return 2676 2677 logger.info("Replacing view '%s'", table_name) 2678 self.adapter.create_view( 2679 table_name, 2680 query_or_df, 2681 model.columns_to_types, 2682 replace=must_recreate_view, 2683 materialized=is_materialized_view, 2684 view_properties=kwargs.get("physical_properties", model.physical_properties), 2685 table_description=model.description, 2686 column_descriptions=model.column_descriptions, 2687 ) 2688 2689 # Apply grants after view creation / replacement 2690 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2691 self._apply_grants(model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable)
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2693 def append( 2694 self, 2695 table_name: str, 2696 query_or_df: QueryOrDF, 2697 model: Model, 2698 render_kwargs: t.Dict[str, t.Any], 2699 **kwargs: t.Any, 2700 ) -> None: 2701 raise ConfigError(f"Cannot append to a view '{table_name}'.")
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2703 def create( 2704 self, 2705 table_name: str, 2706 model: Model, 2707 is_table_deployable: bool, 2708 render_kwargs: t.Dict[str, t.Any], 2709 skip_grants: bool, 2710 **kwargs: t.Any, 2711 ) -> None: 2712 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2713 2714 if self.adapter.table_exists(table_name): 2715 # Make sure we don't recreate the view to prevent deletion of downstream views in engines with no late 2716 # binding support (because of DROP CASCADE). 2717 logger.info("View '%s' already exists", table_name) 2718 2719 if not skip_grants: 2720 # Always apply grants when present, even if view exists, to handle grants updates 2721 self._apply_grants( 2722 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2723 ) 2724 return 2725 2726 logger.info("Creating view '%s'", table_name) 2727 materialized = self._is_materialized_view(model) 2728 materialized_properties = None 2729 if materialized: 2730 materialized_properties = { 2731 "partitioned_by": model.partitioned_by, 2732 "clustered_by": model.clustered_by, 2733 "partition_interval_unit": model.partition_interval_unit, 2734 } 2735 self.adapter.create_view( 2736 table_name, 2737 model.render_query_or_raise(**render_kwargs), 2738 # Make sure we never replace the view during creation to avoid race conditions in engines with no late binding support. 2739 replace=False, 2740 materialized=self._is_materialized_view(model), 2741 materialized_properties=materialized_properties, 2742 view_properties=kwargs.get("physical_properties", model.physical_properties), 2743 table_description=model.description if is_table_deployable else None, 2744 column_descriptions=model.column_descriptions if is_table_deployable else None, 2745 ) 2746 2747 if not skip_grants: 2748 # Apply grants after view creation 2749 self._apply_grants( 2750 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2751 )
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2753 def migrate( 2754 self, 2755 target_table_name: str, 2756 source_table_name: str, 2757 snapshot: Snapshot, 2758 *, 2759 ignore_destructive: bool, 2760 ignore_additive: bool, 2761 **kwargs: t.Any, 2762 ) -> None: 2763 logger.info("Migrating view '%s'", target_table_name) 2764 model = snapshot.model 2765 render_kwargs = dict( 2766 execution_time=now(), snapshots=kwargs["snapshots"], engine_adapter=self.adapter 2767 ) 2768 2769 self.adapter.create_view( 2770 target_table_name, 2771 model.render_query_or_raise(**render_kwargs), 2772 model.columns_to_types, 2773 materialized=self._is_materialized_view(model), 2774 view_properties=model.render_physical_properties(**render_kwargs), 2775 table_description=model.description, 2776 column_descriptions=model.column_descriptions, 2777 ) 2778 2779 # Apply grants after view migration 2780 deployability_index = kwargs.get("deployability_index") 2781 is_snapshot_deployable = ( 2782 deployability_index.is_deployable(snapshot) if deployability_index else False 2783 ) 2784 self._apply_grants( 2785 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2786 )
Migrates the target table schema so that it corresponds to the source table schema.
Arguments:
- target_table_name: The target table name.
- source_table_name: The source table name.
- snapshot: The target snapshot.
- ignore_destructive: If True, destructive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
- ignore_additive: If True, additive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
2788 def delete(self, name: str, **kwargs: t.Any) -> None: 2789 cascade = kwargs.pop("cascade", False) 2790 try: 2791 # Some engines (e.g., RisingWave) don’t fail when dropping a materialized view with a DROP VIEW statement, 2792 # because views and materialized views don’t share the same namespace. Therefore, we should not ignore if the 2793 # view doesn't exist and let the exception handler attempt to drop the materialized view. 2794 self.adapter.drop_view(name, cascade=cascade, ignore_if_not_exists=False) 2795 except Exception: 2796 logger.debug( 2797 "Failed to drop view '%s'. Trying to drop the materialized view instead", 2798 name, 2799 exc_info=True, 2800 ) 2801 self.adapter.drop_view( 2802 name, materialized=True, cascade=cascade, ignore_if_not_exists=True 2803 ) 2804 logger.info("Dropped view '%s'", name)
Deletes a target table or a view.
Arguments:
- name: The name of a table or a view.
2813class CustomMaterialization(IncrementalStrategy, t.Generic[C]): 2814 """Base class for custom materializations.""" 2815 2816 def insert( 2817 self, 2818 table_name: str, 2819 query_or_df: QueryOrDF, 2820 model: Model, 2821 is_first_insert: bool, 2822 render_kwargs: t.Dict[str, t.Any], 2823 **kwargs: t.Any, 2824 ) -> None: 2825 """Inserts the given query or a DataFrame into the target table or a view. 2826 2827 Args: 2828 table_name: The name of the target table or view. 2829 query_or_df: A query or a DataFrame to insert. 2830 model: The target model. 2831 is_first_insert: Whether this is the first insert for this version of a model. This value is set to True 2832 if no data has been previously inserted into the target table, or when the entire history of the target model has 2833 been restated. Note that in the latter case, the table might contain data from previous executions, and it is the 2834 responsibility of a specific evaluation strategy to handle the truncation of the table if necessary. 2835 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 2836 """ 2837 raise NotImplementedError( 2838 "Custom materialization strategies must implement the 'insert' method." 2839 )
Base class for custom materializations.
2816 def insert( 2817 self, 2818 table_name: str, 2819 query_or_df: QueryOrDF, 2820 model: Model, 2821 is_first_insert: bool, 2822 render_kwargs: t.Dict[str, t.Any], 2823 **kwargs: t.Any, 2824 ) -> None: 2825 """Inserts the given query or a DataFrame into the target table or a view. 2826 2827 Args: 2828 table_name: The name of the target table or view. 2829 query_or_df: A query or a DataFrame to insert. 2830 model: The target model. 2831 is_first_insert: Whether this is the first insert for this version of a model. This value is set to True 2832 if no data has been previously inserted into the target table, or when the entire history of the target model has 2833 been restated. Note that in the latter case, the table might contain data from previous executions, and it is the 2834 responsibility of a specific evaluation strategy to handle the truncation of the table if necessary. 2835 render_kwargs: Additional key-value arguments to pass when rendering the model's query. 2836 """ 2837 raise NotImplementedError( 2838 "Custom materialization strategies must implement the 'insert' method." 2839 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2847def get_custom_materialization_kind_type(st: t.Type[CustomMaterialization]) -> t.Type[CustomKind]: 2848 # try to read if there is a custom 'kind' type in use by inspecting the type signature 2849 # eg try to read 'MyCustomKind' from: 2850 # >>>> class MyCustomMaterialization(CustomMaterialization[MyCustomKind]) 2851 # and fall back to base CustomKind if there is no generic type declared 2852 if hasattr(st, "__orig_bases__"): 2853 for base in st.__orig_bases__: 2854 if hasattr(base, "__origin__") and base.__origin__ == CustomMaterialization: 2855 for generic_arg in t.get_args(base): 2856 if not issubclass(generic_arg, CustomKind): 2857 raise SQLMeshError( 2858 f"Custom materialization kind '{generic_arg.__name__}' must be a subclass of CustomKind" 2859 ) 2860 2861 return generic_arg 2862 2863 return CustomKind
2866def get_custom_materialization_type( 2867 name: str, raise_errors: bool = True 2868) -> t.Optional[t.Tuple[t.Type[CustomKind], t.Type[CustomMaterialization]]]: 2869 global _custom_materialization_type_cache 2870 2871 strategy_key = name.lower() 2872 2873 try: 2874 if ( 2875 _custom_materialization_type_cache is None 2876 or strategy_key not in _custom_materialization_type_cache 2877 ): 2878 strategy_types = list(CustomMaterialization.__subclasses__()) 2879 2880 entry_points = metadata.entry_points(group="sqlmesh.materializations") 2881 for entry_point in entry_points: 2882 strategy_type = entry_point.load() 2883 if not issubclass(strategy_type, CustomMaterialization): 2884 raise SQLMeshError( 2885 f"Custom materialization entry point '{entry_point.name}' must be a subclass of CustomMaterialization." 2886 ) 2887 strategy_types.append(strategy_type) 2888 2889 _custom_materialization_type_cache = { 2890 getattr(strategy_type, "NAME", strategy_type.__name__).lower(): ( 2891 get_custom_materialization_kind_type(strategy_type), 2892 strategy_type, 2893 ) 2894 for strategy_type in strategy_types 2895 } 2896 2897 if strategy_key not in _custom_materialization_type_cache: 2898 raise ConfigError(f"Materialization strategy with name '{name}' was not found.") 2899 except (SQLMeshError, ConfigError) as e: 2900 if raise_errors: 2901 raise e 2902 2903 from sqlmesh.core.console import get_console 2904 2905 get_console().log_warning(str(e)) 2906 return None 2907 2908 strategy_kind_type, strategy_type = _custom_materialization_type_cache[strategy_key] 2909 logger.debug( 2910 "Resolved custom materialization '%s' to '%s' (%s)", name, strategy_type, strategy_kind_type 2911 ) 2912 2913 return strategy_kind_type, strategy_type
2916def get_custom_materialization_type_or_raise( 2917 name: str, 2918) -> t.Tuple[t.Type[CustomKind], t.Type[CustomMaterialization]]: 2919 types = get_custom_materialization_type(name, raise_errors=True) 2920 if types is not None: 2921 return types[0], types[1] 2922 2923 # Shouldnt get here as get_custom_materialization_type() has raise_errors=True, but just in case... 2924 raise SQLMeshError(f"Custom materialization '{name}' not present in the Python environment")
2927class DbtCustomMaterializationStrategy(MaterializableStrategy): 2928 def __init__( 2929 self, 2930 adapter: EngineAdapter, 2931 materialization_name: str, 2932 materialization_template: str, 2933 ): 2934 super().__init__(adapter) 2935 self.materialization_name = materialization_name 2936 self.materialization_template = materialization_template 2937 2938 def create( 2939 self, 2940 table_name: str, 2941 model: Model, 2942 is_table_deployable: bool, 2943 render_kwargs: t.Dict[str, t.Any], 2944 skip_grants: bool, 2945 **kwargs: t.Any, 2946 ) -> None: 2947 original_query = model.render_query_or_raise(**render_kwargs) 2948 self._execute_materialization( 2949 table_name=table_name, 2950 query_or_df=original_query.limit(0), 2951 model=model, 2952 is_first_insert=True, 2953 render_kwargs=render_kwargs, 2954 create_only=True, 2955 **kwargs, 2956 ) 2957 2958 # Apply grants after dbt custom materialization table creation 2959 if not skip_grants: 2960 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2961 self._apply_grants( 2962 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2963 ) 2964 2965 def insert( 2966 self, 2967 table_name: str, 2968 query_or_df: QueryOrDF, 2969 model: Model, 2970 is_first_insert: bool, 2971 render_kwargs: t.Dict[str, t.Any], 2972 **kwargs: t.Any, 2973 ) -> None: 2974 self._execute_materialization( 2975 table_name=table_name, 2976 query_or_df=query_or_df, 2977 model=model, 2978 is_first_insert=is_first_insert, 2979 render_kwargs=render_kwargs, 2980 **kwargs, 2981 ) 2982 2983 # Apply grants after custom materialization insert (only on first insert) 2984 if is_first_insert: 2985 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2986 self._apply_grants( 2987 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2988 ) 2989 2990 def append( 2991 self, 2992 table_name: str, 2993 query_or_df: QueryOrDF, 2994 model: Model, 2995 render_kwargs: t.Dict[str, t.Any], 2996 **kwargs: t.Any, 2997 ) -> None: 2998 return self.insert( 2999 table_name, 3000 query_or_df, 3001 model, 3002 is_first_insert=False, 3003 render_kwargs=render_kwargs, 3004 **kwargs, 3005 ) 3006 3007 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 3008 # in dbt custom materialisations it's up to the user to run the pre hooks inside the transaction 3009 if not render_kwargs.get("inside_transaction", True): 3010 super().run_pre_statements( 3011 snapshot=snapshot, 3012 render_kwargs=render_kwargs, 3013 ) 3014 3015 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 3016 # in dbt custom materialisations it's up to the user to run the post hooks inside the transaction 3017 if not render_kwargs.get("inside_transaction", True): 3018 super().run_post_statements( 3019 snapshot=snapshot, 3020 render_kwargs=render_kwargs, 3021 ) 3022 3023 def _execute_materialization( 3024 self, 3025 table_name: str, 3026 query_or_df: QueryOrDF, 3027 model: Model, 3028 is_first_insert: bool, 3029 render_kwargs: t.Dict[str, t.Any], 3030 create_only: bool = False, 3031 **kwargs: t.Any, 3032 ) -> None: 3033 jinja_macros = model.jinja_macros 3034 3035 # For vdes we need to use the table, since we don't know the schema/table at parse time 3036 parts = exp.to_table(table_name, dialect=self.adapter.dialect) 3037 3038 existing_globals = jinja_macros.global_objs 3039 relation_info = existing_globals.get("this") 3040 if isinstance(relation_info, dict): 3041 relation_info["database"] = parts.catalog 3042 relation_info["identifier"] = parts.name 3043 relation_info["name"] = parts.name 3044 3045 jinja_globals = { 3046 **existing_globals, 3047 "this": relation_info, 3048 "database": parts.catalog, 3049 "schema": parts.db, 3050 "identifier": parts.name, 3051 "target": existing_globals.get("target", {"type": self.adapter.dialect}), 3052 "execution_dt": kwargs.get("execution_time"), 3053 "engine_adapter": self.adapter, 3054 "sql": str(query_or_df), 3055 "is_first_insert": is_first_insert, 3056 "create_only": create_only, 3057 "pre_hooks": [ 3058 AttributeDict({"sql": s.this.this, "transaction": transaction}) 3059 for s in model.pre_statements 3060 if (transaction := s.args.get("transaction", True)) 3061 ], 3062 "post_hooks": [ 3063 AttributeDict({"sql": s.this.this, "transaction": transaction}) 3064 for s in model.post_statements 3065 if (transaction := s.args.get("transaction", True)) 3066 ], 3067 "model_instance": model, 3068 **kwargs, 3069 } 3070 3071 try: 3072 jinja_env = jinja_macros.build_environment(**jinja_globals) 3073 template = jinja_env.from_string(self.materialization_template) 3074 3075 try: 3076 template.render() 3077 except MacroReturnVal as ret: 3078 # this is a successful return from a macro call (dbt uses this list of Relations to update their relation cache) 3079 returned_relations = ret.value.get("relations", []) 3080 logger.info( 3081 f"Materialization {self.materialization_name} returned relations: {returned_relations}" 3082 ) 3083 3084 except Exception as e: 3085 raise SQLMeshError( 3086 f"Failed to execute dbt materialization '{self.materialization_name}': {e}" 3087 ) from e
Helper class that provides a standard way to create an ABC using inheritance.
2938 def create( 2939 self, 2940 table_name: str, 2941 model: Model, 2942 is_table_deployable: bool, 2943 render_kwargs: t.Dict[str, t.Any], 2944 skip_grants: bool, 2945 **kwargs: t.Any, 2946 ) -> None: 2947 original_query = model.render_query_or_raise(**render_kwargs) 2948 self._execute_materialization( 2949 table_name=table_name, 2950 query_or_df=original_query.limit(0), 2951 model=model, 2952 is_first_insert=True, 2953 render_kwargs=render_kwargs, 2954 create_only=True, 2955 **kwargs, 2956 ) 2957 2958 # Apply grants after dbt custom materialization table creation 2959 if not skip_grants: 2960 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2961 self._apply_grants( 2962 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2963 )
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2965 def insert( 2966 self, 2967 table_name: str, 2968 query_or_df: QueryOrDF, 2969 model: Model, 2970 is_first_insert: bool, 2971 render_kwargs: t.Dict[str, t.Any], 2972 **kwargs: t.Any, 2973 ) -> None: 2974 self._execute_materialization( 2975 table_name=table_name, 2976 query_or_df=query_or_df, 2977 model=model, 2978 is_first_insert=is_first_insert, 2979 render_kwargs=render_kwargs, 2980 **kwargs, 2981 ) 2982 2983 # Apply grants after custom materialization insert (only on first insert) 2984 if is_first_insert: 2985 is_snapshot_deployable = kwargs.get("is_snapshot_deployable", False) 2986 self._apply_grants( 2987 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 2988 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
2990 def append( 2991 self, 2992 table_name: str, 2993 query_or_df: QueryOrDF, 2994 model: Model, 2995 render_kwargs: t.Dict[str, t.Any], 2996 **kwargs: t.Any, 2997 ) -> None: 2998 return self.insert( 2999 table_name, 3000 query_or_df, 3001 model, 3002 is_first_insert=False, 3003 render_kwargs=render_kwargs, 3004 **kwargs, 3005 )
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
3007 def run_pre_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 3008 # in dbt custom materialisations it's up to the user to run the pre hooks inside the transaction 3009 if not render_kwargs.get("inside_transaction", True): 3010 super().run_pre_statements( 3011 snapshot=snapshot, 3012 render_kwargs=render_kwargs, 3013 )
Executes the snapshot's pre statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
3015 def run_post_statements(self, snapshot: Snapshot, render_kwargs: t.Any) -> None: 3016 # in dbt custom materialisations it's up to the user to run the post hooks inside the transaction 3017 if not render_kwargs.get("inside_transaction", True): 3018 super().run_post_statements( 3019 snapshot=snapshot, 3020 render_kwargs=render_kwargs, 3021 )
Executes the snapshot's post statements.
Arguments:
- snapshot: The target snapshot.
- render_kwargs: Additional key-value arguments to pass when rendering the statements.
Inherited Members
3090class EngineManagedStrategy(MaterializableStrategy): 3091 def create( 3092 self, 3093 table_name: str, 3094 model: Model, 3095 is_table_deployable: bool, 3096 render_kwargs: t.Dict[str, t.Any], 3097 skip_grants: bool, 3098 **kwargs: t.Any, 3099 ) -> None: 3100 is_snapshot_deployable: bool = kwargs["is_snapshot_deployable"] 3101 3102 if is_table_deployable and is_snapshot_deployable: 3103 # We could deploy this to prod; create a proper managed table 3104 logger.info("Creating managed table: %s", table_name) 3105 self.adapter.create_managed_table( 3106 table_name=table_name, 3107 query=model.render_query_or_raise(**render_kwargs), 3108 target_columns_to_types=model.columns_to_types, 3109 partitioned_by=model.partitioned_by, 3110 clustered_by=model.clustered_by, # type: ignore[arg-type] 3111 table_properties=kwargs.get("physical_properties", model.physical_properties), 3112 table_description=model.description, 3113 column_descriptions=model.column_descriptions, 3114 table_format=model.table_format, 3115 ) 3116 3117 # Apply grants after managed table creation 3118 if not skip_grants: 3119 self._apply_grants( 3120 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3121 ) 3122 3123 elif not is_table_deployable: 3124 # Only create the dev preview table as a normal table. 3125 # For the main table, if the snapshot is cant be deployed to prod (eg upstream is forward-only) do nothing. 3126 # Any downstream models that reference it will be updated to point to the dev preview table. 3127 # If the user eventually tries to deploy it, the logic in insert() will see it doesnt exist and create it 3128 super().create( 3129 table_name=table_name, 3130 model=model, 3131 is_table_deployable=is_table_deployable, 3132 render_kwargs=render_kwargs, 3133 skip_grants=skip_grants, 3134 **kwargs, 3135 ) 3136 3137 def insert( 3138 self, 3139 table_name: str, 3140 query_or_df: QueryOrDF, 3141 model: Model, 3142 is_first_insert: bool, 3143 render_kwargs: t.Dict[str, t.Any], 3144 **kwargs: t.Any, 3145 ) -> None: 3146 deployability_index: DeployabilityIndex = kwargs["deployability_index"] 3147 snapshot: Snapshot = kwargs["snapshot"] 3148 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 3149 if is_first_insert and is_snapshot_deployable and not self.adapter.table_exists(table_name): 3150 self.adapter.create_managed_table( 3151 table_name=table_name, 3152 query=query_or_df, # type: ignore 3153 target_columns_to_types=model.columns_to_types, 3154 partitioned_by=model.partitioned_by, 3155 clustered_by=model.clustered_by, # type: ignore[arg-type] 3156 table_properties=kwargs.get("physical_properties", model.physical_properties), 3157 table_description=model.description, 3158 column_descriptions=model.column_descriptions, 3159 table_format=model.table_format, 3160 ) 3161 self._apply_grants( 3162 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3163 ) 3164 elif not is_snapshot_deployable: 3165 # Snapshot isnt deployable; update the preview table instead 3166 # If the snapshot was deployable, then data would have already been loaded in create() because a managed table would have been created 3167 logger.info( 3168 "Updating preview table: %s (for managed model: %s)", 3169 table_name, 3170 model.name, 3171 ) 3172 self._replace_query_for_model( 3173 model=model, 3174 name=table_name, 3175 query_or_df=query_or_df, 3176 render_kwargs=render_kwargs, 3177 **kwargs, 3178 ) 3179 3180 def append( 3181 self, 3182 table_name: str, 3183 query_or_df: QueryOrDF, 3184 model: Model, 3185 render_kwargs: t.Dict[str, t.Any], 3186 **kwargs: t.Any, 3187 ) -> None: 3188 raise ConfigError(f"Cannot append to a managed table '{table_name}'.") 3189 3190 def migrate( 3191 self, 3192 target_table_name: str, 3193 source_table_name: str, 3194 snapshot: Snapshot, 3195 *, 3196 ignore_destructive: bool, 3197 ignore_additive: bool, 3198 **kwargs: t.Any, 3199 ) -> None: 3200 potential_alter_operations = self.adapter.get_alter_operations( 3201 target_table_name, 3202 source_table_name, 3203 ignore_destructive=ignore_destructive, 3204 ignore_additive=ignore_additive, 3205 ) 3206 if len(potential_alter_operations) > 0: 3207 # this can happen if a user changes a managed model and deliberately overrides a plan to be forward only, eg `sqlmesh plan --forward-only` 3208 raise MigrationNotSupportedError( 3209 f"The schema of the managed model '{target_table_name}' cannot be updated in a forward-only fashion." 3210 ) 3211 3212 # Apply grants after verifying no schema changes 3213 deployability_index = kwargs.get("deployability_index") 3214 is_snapshot_deployable = ( 3215 deployability_index.is_deployable(snapshot) if deployability_index else False 3216 ) 3217 self._apply_grants( 3218 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3219 ) 3220 3221 def delete(self, name: str, **kwargs: t.Any) -> None: 3222 # a dev preview table is created as a normal table, so it needs to be dropped as a normal table 3223 _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) 3224 if kwargs["is_table_deployable"]: 3225 self.adapter.drop_managed_table(name) 3226 logger.info("Dropped managed table '%s'", name) 3227 else: 3228 self.adapter.drop_table(name) 3229 logger.info("Dropped dev preview for managed table '%s'", name)
Helper class that provides a standard way to create an ABC using inheritance.
3091 def create( 3092 self, 3093 table_name: str, 3094 model: Model, 3095 is_table_deployable: bool, 3096 render_kwargs: t.Dict[str, t.Any], 3097 skip_grants: bool, 3098 **kwargs: t.Any, 3099 ) -> None: 3100 is_snapshot_deployable: bool = kwargs["is_snapshot_deployable"] 3101 3102 if is_table_deployable and is_snapshot_deployable: 3103 # We could deploy this to prod; create a proper managed table 3104 logger.info("Creating managed table: %s", table_name) 3105 self.adapter.create_managed_table( 3106 table_name=table_name, 3107 query=model.render_query_or_raise(**render_kwargs), 3108 target_columns_to_types=model.columns_to_types, 3109 partitioned_by=model.partitioned_by, 3110 clustered_by=model.clustered_by, # type: ignore[arg-type] 3111 table_properties=kwargs.get("physical_properties", model.physical_properties), 3112 table_description=model.description, 3113 column_descriptions=model.column_descriptions, 3114 table_format=model.table_format, 3115 ) 3116 3117 # Apply grants after managed table creation 3118 if not skip_grants: 3119 self._apply_grants( 3120 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3121 ) 3122 3123 elif not is_table_deployable: 3124 # Only create the dev preview table as a normal table. 3125 # For the main table, if the snapshot is cant be deployed to prod (eg upstream is forward-only) do nothing. 3126 # Any downstream models that reference it will be updated to point to the dev preview table. 3127 # If the user eventually tries to deploy it, the logic in insert() will see it doesnt exist and create it 3128 super().create( 3129 table_name=table_name, 3130 model=model, 3131 is_table_deployable=is_table_deployable, 3132 render_kwargs=render_kwargs, 3133 skip_grants=skip_grants, 3134 **kwargs, 3135 )
Creates the target table or view.
Note that the intention here is to just create the table structure, data is loaded in insert() and append()
Arguments:
- table_name: The name of a table or a view.
- model: The target model.
- is_table_deployable: True if this creation request is for the "main" table that might be deployed to a production environment. False if this creation request is for the "dev preview" table. Note that this flag is not related to the DeployabilityIndex which determines if the snapshot is deployable to production or not
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
3137 def insert( 3138 self, 3139 table_name: str, 3140 query_or_df: QueryOrDF, 3141 model: Model, 3142 is_first_insert: bool, 3143 render_kwargs: t.Dict[str, t.Any], 3144 **kwargs: t.Any, 3145 ) -> None: 3146 deployability_index: DeployabilityIndex = kwargs["deployability_index"] 3147 snapshot: Snapshot = kwargs["snapshot"] 3148 is_snapshot_deployable = deployability_index.is_deployable(snapshot) 3149 if is_first_insert and is_snapshot_deployable and not self.adapter.table_exists(table_name): 3150 self.adapter.create_managed_table( 3151 table_name=table_name, 3152 query=query_or_df, # type: ignore 3153 target_columns_to_types=model.columns_to_types, 3154 partitioned_by=model.partitioned_by, 3155 clustered_by=model.clustered_by, # type: ignore[arg-type] 3156 table_properties=kwargs.get("physical_properties", model.physical_properties), 3157 table_description=model.description, 3158 column_descriptions=model.column_descriptions, 3159 table_format=model.table_format, 3160 ) 3161 self._apply_grants( 3162 model, table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3163 ) 3164 elif not is_snapshot_deployable: 3165 # Snapshot isnt deployable; update the preview table instead 3166 # If the snapshot was deployable, then data would have already been loaded in create() because a managed table would have been created 3167 logger.info( 3168 "Updating preview table: %s (for managed model: %s)", 3169 table_name, 3170 model.name, 3171 ) 3172 self._replace_query_for_model( 3173 model=model, 3174 name=table_name, 3175 query_or_df=query_or_df, 3176 render_kwargs=render_kwargs, 3177 **kwargs, 3178 )
Inserts the given query or a DataFrame into the target table or a view.
Arguments:
- table_name: The name of the target table or view.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- is_first_insert: Whether this is the first insert for this version of a model. This value is set to True if no data has been previously inserted into the target table, or when the entire history of the target model has been restated. Note that in the latter case, the table might contain data from previous executions, and it is the responsibility of a specific evaluation strategy to handle the truncation of the table if necessary.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
3180 def append( 3181 self, 3182 table_name: str, 3183 query_or_df: QueryOrDF, 3184 model: Model, 3185 render_kwargs: t.Dict[str, t.Any], 3186 **kwargs: t.Any, 3187 ) -> None: 3188 raise ConfigError(f"Cannot append to a managed table '{table_name}'.")
Appends the given query or a DataFrame to the existing table.
Arguments:
- table_name: The target table name.
- query_or_df: A query or a DataFrame to insert.
- model: The target model.
- render_kwargs: Additional key-value arguments to pass when rendering the model's query.
3190 def migrate( 3191 self, 3192 target_table_name: str, 3193 source_table_name: str, 3194 snapshot: Snapshot, 3195 *, 3196 ignore_destructive: bool, 3197 ignore_additive: bool, 3198 **kwargs: t.Any, 3199 ) -> None: 3200 potential_alter_operations = self.adapter.get_alter_operations( 3201 target_table_name, 3202 source_table_name, 3203 ignore_destructive=ignore_destructive, 3204 ignore_additive=ignore_additive, 3205 ) 3206 if len(potential_alter_operations) > 0: 3207 # this can happen if a user changes a managed model and deliberately overrides a plan to be forward only, eg `sqlmesh plan --forward-only` 3208 raise MigrationNotSupportedError( 3209 f"The schema of the managed model '{target_table_name}' cannot be updated in a forward-only fashion." 3210 ) 3211 3212 # Apply grants after verifying no schema changes 3213 deployability_index = kwargs.get("deployability_index") 3214 is_snapshot_deployable = ( 3215 deployability_index.is_deployable(snapshot) if deployability_index else False 3216 ) 3217 self._apply_grants( 3218 snapshot.model, target_table_name, GrantsTargetLayer.PHYSICAL, is_snapshot_deployable 3219 )
Migrates the target table schema so that it corresponds to the source table schema.
Arguments:
- target_table_name: The target table name.
- source_table_name: The source table name.
- snapshot: The target snapshot.
- ignore_destructive: If True, destructive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
- ignore_additive: If True, additive changes are not created when migrating. This is used for forward-only models that are being migrated to a new version.
3221 def delete(self, name: str, **kwargs: t.Any) -> None: 3222 # a dev preview table is created as a normal table, so it needs to be dropped as a normal table 3223 _check_table_db_is_physical_schema(name, kwargs["physical_schema"]) 3224 if kwargs["is_table_deployable"]: 3225 self.adapter.drop_managed_table(name) 3226 logger.info("Dropped managed table '%s'", name) 3227 else: 3228 self.adapter.drop_table(name) 3229 logger.info("Dropped dev preview for managed table '%s'", name)
Deletes a target table or a view.
Arguments:
- name: The name of a table or a view.