Edit on GitHub

sqlmesh.core.model.common

View Source

  1from __future__ import annotations
  2
  3import ast
  4import typing as t
  5from pathlib import Path
  6
  7from difflib import get_close_matches
  8from sqlglot import exp
  9from sqlglot.helper import ensure_list
 10
 11from sqlmesh.core import constants as c
 12from sqlmesh.core import dialect as d
 13from sqlmesh.core.macros import MacroRegistry, MacroStrTemplate
 14from sqlmesh.utils import str_to_bool
 15from sqlmesh.utils.errors import ConfigError, SQLMeshError, raise_config_error
 16from sqlmesh.utils.metaprogramming import (
 17    Executable,
 18    SqlValue,
 19    build_env,
 20    prepare_env,
 21    serialize_env,
 22)
 23from sqlmesh.utils.pydantic import (
 24    PydanticModel,
 25    ValidationInfo,
 26    field_validator,
 27    get_dialect,
 28    validation_data,
 29)
 30
 31if t.TYPE_CHECKING:
 32    from sqlglot.dialects.dialect import DialectType
 33    from sqlmesh.utils import registry_decorator
 34    from sqlmesh.utils.jinja import MacroReference
 35
 36    MacroCallable = t.Union[Executable, registry_decorator]
 37
 38
 39def make_python_env(
 40    expressions: t.Union[
 41        exp.Expr,
 42        t.List[t.Union[exp.Expr, t.Tuple[exp.Expr, bool]]],
 43    ],
 44    jinja_macro_references: t.Optional[t.Set[MacroReference]],
 45    module_path: Path,
 46    macros: MacroRegistry,
 47    variables: t.Optional[t.Dict[str, t.Any]] = None,
 48    referenced_variables: t.Optional[t.Set[str]] = None,
 49    path: t.Optional[Path] = None,
 50    python_env: t.Optional[t.Dict[str, Executable]] = None,
 51    strict_resolution: bool = True,
 52    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
 53    dialect: DialectType = None,
 54) -> t.Dict[str, Executable]:
 55    python_env = {} if python_env is None else python_env
 56    env: t.Dict[str, t.Tuple[t.Any, t.Optional[bool]]] = {}
 57
 58    variables = variables or {}
 59    blueprint_variables = blueprint_variables or {}
 60
 61    used_macros: t.Dict[str, t.Tuple[MacroCallable, bool]] = {}
 62
 63    # var -> True: var is metadata-only
 64    # var -> False: var is not metadata-only
 65    # var -> None: cannot determine whether var is metadata-only yet, need to walk macros first
 66    used_variables: t.Dict[str, t.Optional[bool]] = dict.fromkeys(
 67        referenced_variables or set(), False
 68    )
 69
 70    # id(expr) -> true: expr appears under the AST of a metadata-only macro function
 71    # id(expr) -> false: expr appears under the AST of a macro function whose metadata status we don't yet know
 72    expr_under_metadata_macro_func: t.Dict[int, bool] = {}
 73
 74    # For @m1(@m2(@x), @y), we'd get x -> m1 and y -> m1
 75    outermost_macro_func_ancestor_by_var: t.Dict[str, str] = {}
 76    visited_macro_funcs: t.Set[int] = set()
 77
 78    def _is_metadata_var(
 79        name: str, expression: exp.Expr, appears_in_metadata_expression: bool
 80    ) -> t.Optional[bool]:
 81        is_metadata_so_far = used_variables.get(name, True)
 82        if is_metadata_so_far is False:
 83            # We've concluded this variable is definitely not metadata-only
 84            return False
 85
 86        appears_under_metadata_macro_func = expr_under_metadata_macro_func.get(id(expression))
 87        if is_metadata_so_far and (
 88            appears_in_metadata_expression or appears_under_metadata_macro_func
 89        ):
 90            # The variable appears in a metadata expression, e.g., audits (...),
 91            # or in the AST of metadata-only macro call, e.g., @FOO(@x)
 92            return True
 93
 94        # The variable appears in the AST of a macro call, but we don't know if it's metadata-only
 95        if appears_under_metadata_macro_func is False:
 96            return None
 97
 98        # The variable appears elsewhere, e.g., in the model's query: SELECT @x
 99        return False
100
101    def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool:
102        if name in used_macros:
103            is_metadata_so_far = used_macros[name][1]
104            return is_metadata_so_far and appears_in_metadata_expression
105
106        return appears_in_metadata_expression
107
108    expressions = ensure_list(expressions)
109    for expression_metadata in expressions:
110        if isinstance(expression_metadata, tuple):
111            expression, is_metadata = expression_metadata
112        else:
113            expression, is_metadata = expression_metadata, False
114
115        if isinstance(expression, d.Jinja):
116            continue
117
118        for macro_func_or_var in expression.find_all(d.MacroFunc, d.MacroVar, exp.Identifier):
119            if macro_func_or_var.__class__ is d.MacroFunc:
120                name = macro_func_or_var.this.name.lower()
121                if name not in macros:
122                    continue
123
124                used_macros[name] = (macros[name], _is_metadata_macro(name, is_metadata))
125
126                if name in (c.VAR, c.BLUEPRINT_VAR):
127                    args = macro_func_or_var.this.expressions
128                    if len(args) < 1:
129                        raise_config_error(
130                            f"Macro {name.upper()} requires at least one argument", path
131                        )
132
133                    if not args[0].is_string:
134                        raise_config_error(
135                            f"The variable name must be a string literal, '{args[0].sql()}' was given instead",
136                            path,
137                        )
138
139                    var_name = args[0].this.lower()
140                    used_variables[var_name] = _is_metadata_var(
141                        var_name, macro_func_or_var, is_metadata
142                    )
143                elif id(macro_func_or_var) not in visited_macro_funcs:
144                    # We only care about the top-level macro function calls to determine the metadata
145                    # status of the variables referenced in their ASTs. For example, in @m1(@m2(@x)),
146                    # if m1 is metadata-only but m2 is not, we can still determine that @x only affects
147                    # the metadata hash, since m2's result feeds into a metadata-only macro function.
148                    #
149                    # Generally, if the top-level call is known to be metadata-only or appear in a
150                    # metadata expression, then we can avoid traversing nested macro function calls.
151
152                    var_refs, _expr_under_metadata_macro_func, _visited_macro_funcs = (
153                        _extract_macro_func_variable_references(macro_func_or_var, is_metadata)
154                    )
155                    expr_under_metadata_macro_func.update(_expr_under_metadata_macro_func)
156                    visited_macro_funcs.update(_visited_macro_funcs)
157                    outermost_macro_func_ancestor_by_var |= {var_ref: name for var_ref in var_refs}
158            elif macro_func_or_var.__class__ is d.MacroVar:
159                var_name = macro_func_or_var.name.lower()
160                if var_name in macros:
161                    used_macros[var_name] = (
162                        macros[var_name],
163                        _is_metadata_macro(var_name, is_metadata),
164                    )
165                elif var_name in variables or var_name in blueprint_variables:
166                    used_variables[var_name] = _is_metadata_var(
167                        var_name, macro_func_or_var, is_metadata
168                    )
169            elif (
170                isinstance(macro_func_or_var, (exp.Identifier, d.MacroStrReplace, d.MacroSQL))
171            ) and "@" in macro_func_or_var.name:
172                for _, identifier, braced_identifier, _ in MacroStrTemplate.pattern.findall(
173                    macro_func_or_var.name
174                ):
175                    var_name = braced_identifier or identifier
176                    if var_name in variables or var_name in blueprint_variables:
177                        used_variables[var_name] = _is_metadata_var(
178                            var_name, macro_func_or_var, is_metadata
179                        )
180
181    for macro_ref in jinja_macro_references or set():
182        if macro_ref.package is None and macro_ref.name in macros:
183            used_macros[macro_ref.name] = (macros[macro_ref.name], False)
184
185    for name, (used_macro, is_metadata) in used_macros.items():
186        if isinstance(used_macro, Executable):
187            python_env[name] = used_macro
188        elif not hasattr(used_macro, c.SQLMESH_BUILTIN) and name not in python_env:
189            build_env(
190                used_macro.func,
191                env=env,
192                name=name,
193                path=module_path,
194                is_metadata_obj=is_metadata,
195            )
196
197    python_env.update(serialize_env(env, path=module_path))
198    return _add_variables_to_python_env(
199        python_env,
200        used_variables,
201        variables,
202        blueprint_variables=blueprint_variables,
203        dialect=dialect,
204        strict_resolution=strict_resolution,
205        outermost_macro_func_ancestor_by_var=outermost_macro_func_ancestor_by_var,
206    )
207
208
209def _extract_macro_func_variable_references(
210    macro_func: exp.Expr,
211    is_metadata: bool,
212) -> t.Tuple[t.Set[str], t.Dict[int, bool], t.Set[int]]:
213    var_references = set()
214    visited_macro_funcs = set()
215    expr_under_metadata_macro_func = {}
216
217    for n in macro_func.walk():
218        if type(n) is d.MacroFunc:
219            visited_macro_funcs.add(id(n))
220
221            this = n.this
222            args = this.expressions
223
224            if this.name.lower() in (c.VAR, c.BLUEPRINT_VAR) and args and args[0].is_string:
225                var_references.add(args[0].this.lower())
226                expr_under_metadata_macro_func[id(n)] = is_metadata
227        elif isinstance(n, d.MacroVar):
228            var_references.add(n.name.lower())
229            expr_under_metadata_macro_func[id(n)] = is_metadata
230        elif isinstance(n, (exp.Identifier, d.MacroStrReplace, d.MacroSQL)) and "@" in n.name:
231            var_references.update(
232                (braced_identifier or identifier).lower()
233                for _, identifier, braced_identifier, _ in MacroStrTemplate.pattern.findall(n.name)
234            )
235            expr_under_metadata_macro_func[id(n)] = is_metadata
236
237    return (var_references, expr_under_metadata_macro_func, visited_macro_funcs)
238
239
240def _add_variables_to_python_env(
241    python_env: t.Dict[str, Executable],
242    used_variables: t.Dict[str, t.Optional[bool]],
243    variables: t.Optional[t.Dict[str, t.Any]],
244    strict_resolution: bool = True,
245    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
246    dialect: DialectType = None,
247    outermost_macro_func_ancestor_by_var: t.Optional[t.Dict[str, str]] = None,
248) -> t.Dict[str, Executable]:
249    _, python_used_variables = parse_dependencies(
250        python_env,
251        None,
252        strict_resolution=strict_resolution,
253        variables=variables,
254        blueprint_variables=blueprint_variables,
255    )
256    for var_name, is_metadata in python_used_variables.items():
257        used_variables[var_name] = is_metadata and used_variables.get(var_name, True)
258
259    # Variables are treated as metadata-only when all of their references either:
260    # - appear in metadata-only expressions, such as `audits (...)`, virtual statements, etc
261    # - appear in the ASTs or definitions of metadata-only macros
262    #
263    # See also: https://github.com/SQLMesh/sqlmesh/pull/4936#issuecomment-3136339936,
264    # specifically the "Terminology" and "Observations" section.
265    metadata_used_variables = {
266        var_name for var_name, is_metadata in used_variables.items() if is_metadata
267    }
268    for used_var, outermost_macro_func in (outermost_macro_func_ancestor_by_var or {}).items():
269        used_var_is_metadata = used_variables.get(used_var)
270        if used_var_is_metadata is False:
271            continue
272
273        # At this point we can decide whether a variable reference in a macro call's AST is
274        # metadata-only, because we've annotated the corresponding macro call in the python env.
275        if outermost_macro_func in python_env and python_env[outermost_macro_func].is_metadata:
276            metadata_used_variables.add(used_var)
277
278    non_metadata_used_variables = set(used_variables) - metadata_used_variables
279
280    if overlapping_variables := (non_metadata_used_variables & metadata_used_variables):
281        raise ConfigError(
282            f"Variables {', '.join(overlapping_variables)} are both metadata and non-metadata, "
283            "which is unexpected. Please file an issue at https://github.com/SQLMesh/sqlmesh/issues/new."
284        )
285
286    metadata_variables = {
287        k: v for k, v in (variables or {}).items() if k in metadata_used_variables
288    }
289    variables = {k: v for k, v in (variables or {}).items() if k in non_metadata_used_variables}
290
291    if variables:
292        python_env[c.SQLMESH_VARS] = Executable.value(variables, sort_root_dict=True)
293    if metadata_variables:
294        python_env[c.SQLMESH_VARS_METADATA] = Executable.value(
295            metadata_variables, sort_root_dict=True, is_metadata=True
296        )
297
298    if blueprint_variables:
299        metadata_blueprint_variables = {
300            k: SqlValue(sql=v.sql(dialect=dialect)) if isinstance(v, exp.Expr) else v
301            for k, v in blueprint_variables.items()
302            if k in metadata_used_variables
303        }
304        blueprint_variables = {
305            k.lower(): SqlValue(sql=v.sql(dialect=dialect)) if isinstance(v, exp.Expr) else v
306            for k, v in blueprint_variables.items()
307            if k in non_metadata_used_variables
308        }
309        if blueprint_variables:
310            python_env[c.SQLMESH_BLUEPRINT_VARS] = Executable.value(
311                blueprint_variables, sort_root_dict=True
312            )
313        if metadata_blueprint_variables:
314            python_env[c.SQLMESH_BLUEPRINT_VARS_METADATA] = Executable.value(
315                metadata_blueprint_variables, sort_root_dict=True, is_metadata=True
316            )
317
318    return python_env
319
320
321def parse_dependencies(
322    python_env: t.Dict[str, Executable],
323    entrypoint: t.Optional[str],
324    strict_resolution: bool = True,
325    variables: t.Optional[t.Dict[str, t.Any]] = None,
326    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
327) -> t.Tuple[t.Set[str], t.Dict[str, bool]]:
328    """
329    Parses the source of a model function and finds upstream table dependencies
330    and referenced variables based on calls to context / evaluator.
331
332    Args:
333        python_env: A dictionary of Python definitions.
334        entrypoint: The name of the function.
335        strict_resolution: If true, the arguments of `table` and `resolve_table` calls must
336            be resolvable at parse time, otherwise an exception will be raised.
337        variables: The variables available to the python environment.
338        blueprint_variables: The blueprint variables available to the python environment.
339
340    Returns:
341        A tuple containing the set of upstream table dependencies and a mapping of
342        the referenced variables associated with their metadata status.
343    """
344
345    class VariableResolutionContext:
346        """This enables calls like `resolve_table` to reference `var()` and `blueprint_var()`."""
347
348        @staticmethod
349        def var(var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]:
350            return (variables or {}).get(var_name.lower(), default)
351
352        @staticmethod
353        def blueprint_var(var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]:
354            return (blueprint_variables or {}).get(var_name.lower(), default)
355
356    env = prepare_env(python_env)
357    local_env = dict.fromkeys(("context", "evaluator"), VariableResolutionContext)
358
359    depends_on = set()
360    used_variables: t.Dict[str, bool] = {}
361
362    for executable in python_env.values():
363        if not executable.is_definition:
364            continue
365
366        is_metadata = executable.is_metadata
367        for node in ast.walk(ast.parse(executable.payload)):
368            next_variables = set()
369
370            if isinstance(node, ast.Call):
371                func = node.func
372                if not isinstance(func, ast.Attribute) or not isinstance(func.value, ast.Name):
373                    continue
374
375                def get_first_arg(keyword_arg_name: str) -> t.Any:
376                    if node.args:
377                        first_arg: t.Optional[ast.expr] = node.args[0]
378                    else:
379                        first_arg = next(
380                            (
381                                keyword.value
382                                for keyword in node.keywords
383                                if keyword.arg == keyword_arg_name
384                            ),
385                            None,
386                        )
387
388                    try:
389                        expression = ast.unparse(t.cast(ast.expr, first_arg))
390                        return eval(expression, env, local_env)
391                    except Exception:
392                        if strict_resolution:
393                            raise ConfigError(
394                                f"Error resolving dependencies for '{executable.path}'. "
395                                f"Argument '{expression.strip()}' must be resolvable at parse time."
396                            )
397
398                if func.value.id == "context" and func.attr in ("table", "resolve_table"):
399                    depends_on.add(get_first_arg("model_name"))
400                elif func.value.id in ("context", "evaluator") and func.attr in (
401                    c.VAR,
402                    c.BLUEPRINT_VAR,
403                ):
404                    next_variables.add(get_first_arg("var_name").lower())
405            elif (
406                isinstance(node, ast.Attribute)
407                and isinstance(node.value, ast.Name)
408                and node.value.id in ("context", "evaluator")
409                and node.attr == c.GATEWAY
410            ):
411                # Check whether the gateway attribute is referenced.
412                next_variables.add(c.GATEWAY)
413            elif isinstance(node, ast.FunctionDef) and node.name == entrypoint:
414                next_variables.update(
415                    [
416                        arg.arg
417                        for arg in [*node.args.args, *node.args.kwonlyargs]
418                        if arg.arg != "context"
419                    ]
420                )
421
422            for var_name in next_variables:
423                used_variables[var_name] = used_variables.get(var_name, True) and bool(is_metadata)
424
425    return depends_on, used_variables
426
427
428def validate_extra_and_required_fields(
429    klass: t.Type[PydanticModel],
430    provided_fields: t.Set[str],
431    entity_name: str,
432    path: t.Optional[Path] = None,
433) -> None:
434    missing_required_fields = klass.missing_required_fields(provided_fields)
435    if missing_required_fields:
436        field_names = "'" + "', '".join(missing_required_fields) + "'"
437        raise_config_error(
438            f"Please add required field{'s' if len(missing_required_fields) > 1 else ''} {field_names} to the {entity_name}.",
439            path,
440        )
441
442    extra_fields = klass.extra_fields(provided_fields)
443    if extra_fields:
444        extra_field_names = "'" + "', '".join(extra_fields) + "'"
445
446        all_fields = klass.all_fields()
447        close_matches = {}
448        for field in extra_fields:
449            matches = get_close_matches(field, all_fields, n=1)
450            if matches:
451                close_matches[field] = matches[0]
452
453        if len(close_matches) == 1:
454            similar_msg = ". Did you mean " + "'" + "', '".join(close_matches.values()) + "'?"
455        else:
456            similar = [
457                f"- {field}: Did you mean '{match}'?" for field, match in close_matches.items()
458            ]
459            similar_msg = "\n\n  " + "\n  ".join(similar) if similar else ""
460
461        raise_config_error(
462            f"Invalid field name{'s' if len(extra_fields) > 1 else ''} present in the {entity_name}: {extra_field_names}{similar_msg}",
463            path,
464        )
465
466
467def single_value_or_tuple(values: t.Sequence) -> exp.Identifier | exp.Tuple:
468    return (
469        exp.to_identifier(values[0])
470        if len(values) == 1
471        else exp.Tuple(expressions=[exp.to_identifier(v) for v in values])
472    )
473
474
475def parse_expression(
476    cls: t.Type,
477    v: t.Union[t.List[str], t.List[exp.Expr], str, exp.Expr, t.Callable, None],
478    info: t.Optional[ValidationInfo],
479) -> t.List[exp.Expr] | exp.Expr | t.Callable | None:
480    """Helper method to deserialize SQLGlot expressions in Pydantic Models."""
481    if v is None:
482        return None
483
484    if callable(v):
485        return v
486
487    dialect = validation_data(info).get("dialect") if info else ""
488
489    if isinstance(v, list):
490        return [
491            e if isinstance(e, exp.Expr) else d.parse_one(e, dialect=dialect)  # type: ignore[misc]
492            for e in v
493            if not isinstance(e, exp.Semicolon)
494        ]
495
496    if isinstance(v, str):
497        return d.parse_one(v, dialect=dialect)
498
499    if not v:
500        raise ConfigError(f"Could not parse {v}")
501
502    return v
503
504
505def parse_bool(v: t.Any) -> bool:
506    if isinstance(v, exp.Expr):
507        if not isinstance(v, exp.Boolean):
508            from sqlglot.optimizer.simplify import simplify
509
510            # Try to reduce expressions like (1 = 1) (see: T-SQL boolean generation)
511            v = simplify(v)
512
513        if isinstance(v, exp.Boolean):
514            return v.this
515
516        return str_to_bool(v.name)
517
518    return str_to_bool(str(v or ""))
519
520
521def parse_properties(
522    cls: t.Type, v: t.Any, info: t.Optional[ValidationInfo]
523) -> t.Optional[exp.Tuple]:
524    if v is None:
525        return v
526
527    dialect = validation_data(info).get("dialect") if info else ""
528
529    if isinstance(v, str):
530        v = d.parse_one(v, dialect=dialect)
531    if isinstance(v, (exp.Array, exp.Paren, exp.Tuple)):
532        eq_expressions: t.List[exp.Expr] = (
533            [v.unnest()] if isinstance(v, exp.Paren) else v.expressions
534        )
535
536        for eq_expr in eq_expressions:
537            if not isinstance(eq_expr, exp.EQ):
538                raise ConfigError(
539                    f"Invalid property '{eq_expr.sql(dialect=dialect)}'. "
540                    "Properties must be specified as key-value pairs <key> = <value>. "
541                )
542
543        properties = (
544            exp.Tuple(expressions=eq_expressions) if isinstance(v, (exp.Paren, exp.Array)) else v
545        )
546    elif isinstance(v, dict):
547        properties = exp.Tuple(
548            expressions=[exp.Literal.string(key).eq(value) for key, value in v.items()]
549        )
550    else:
551        raise SQLMeshError(f"Unexpected properties '{v}'")
552
553    properties.meta["dialect"] = dialect
554    return properties
555
556
557def default_catalog(cls: t.Type, v: t.Any) -> t.Optional[str]:
558    if v is None:
559        return None
560    # If v is an expression then we will return expression as sql without a dialect
561    return str(v)
562
563
564def depends_on(cls: t.Type, v: t.Any, info: ValidationInfo) -> t.Optional[t.Set[str]]:
565    data = validation_data(info)
566    dialect = data.get("dialect")
567    default_catalog = data.get("default_catalog")
568
569    if isinstance(v, exp.Paren):
570        v = v.unnest()
571
572    if isinstance(v, (exp.Array, exp.Tuple)):
573        return {
574            d.normalize_model_name(
575                table.name if table.is_string else table,
576                default_catalog=default_catalog,
577                dialect=dialect,
578            )
579            for table in v.expressions
580        }
581    if isinstance(v, (exp.Table, exp.Column)):
582        return {d.normalize_model_name(v, default_catalog=default_catalog, dialect=dialect)}
583    if hasattr(v, "__iter__") and not isinstance(v, str):
584        return {
585            d.normalize_model_name(name, default_catalog=default_catalog, dialect=dialect)
586            for name in v
587        }
588
589    return v
590
591
592def sort_python_env(python_env: t.Dict[str, Executable]) -> t.List[t.Tuple[str, Executable]]:
593    """Returns the python env sorted."""
594    return sorted(python_env.items(), key=lambda x: (x[1].kind, x[0]))
595
596
597def sorted_python_env_payloads(python_env: t.Dict[str, Executable]) -> t.List[str]:
598    """Returns the payloads of the sorted python env."""
599
600    def _executable_to_str(k: str, v: Executable) -> str:
601        result = f"# {v.path}\n" if v.path is not None else ""
602        if v.is_import or v.is_definition:
603            result += v.payload
604        else:
605            result += f"{k} = {v.payload}"
606        return result
607
608    return [_executable_to_str(k, v) for k, v in sort_python_env(python_env)]
609
610
611def parse_strings_with_macro_refs(value: t.Any, dialect: DialectType) -> t.Any:
612    if isinstance(value, str) and "@" in value:
613        return exp.maybe_parse(value, dialect=dialect)
614
615    if isinstance(value, dict):
616        for k, v in dict(value).items():
617            value[k] = parse_strings_with_macro_refs(v, dialect)
618    elif isinstance(value, list):
619        value = [parse_strings_with_macro_refs(v, dialect) for v in value]
620
621    return value
622
623
624expression_validator: t.Callable = field_validator(
625    "unique_key",
626    mode="before",
627    check_fields=False,
628)(parse_expression)
629
630
631bool_validator: t.Callable = field_validator(
632    "skip",
633    "blocking",
634    "forward_only",
635    "disable_restatement",
636    "insert_overwrite",
637    "allow_partials",
638    "enabled",
639    "optimize_query",
640    "formatting",
641    mode="before",
642    check_fields=False,
643)(parse_bool)
644
645
646properties_validator: t.Callable = field_validator(
647    "physical_properties_",
648    "virtual_properties_",
649    "materialization_properties_",
650    "grants_",
651    mode="before",
652    check_fields=False,
653)(parse_properties)
654
655
656default_catalog_validator: t.Callable = field_validator(
657    "default_catalog",
658    mode="before",
659    check_fields=False,
660)(default_catalog)
661
662
663depends_on_validator: t.Callable = field_validator(
664    "depends_on_",
665    mode="before",
666    check_fields=False,
667)(depends_on)
668
669
670class ParsableSql(PydanticModel):
671    sql: str
672    transaction: t.Optional[bool] = None
673
674    _parsed: t.Optional[exp.Expr] = None
675    _parsed_dialect: t.Optional[str] = None
676
677    def parse(self, dialect: str) -> exp.Expr:
678        if self._parsed is None or self._parsed_dialect != dialect:
679            self._parsed = d.parse_one(self.sql, dialect=dialect)
680            self._parsed_dialect = dialect
681        return self._parsed  # type: ignore[return-value]
682
683    @classmethod
684    def from_parsed_expression(
685        cls, parsed_expression: exp.Expr, dialect: str, use_meta_sql: bool = False
686    ) -> ParsableSql:
687        sql = (
688            parsed_expression.meta.get("sql") or parsed_expression.sql(dialect=dialect)
689            if use_meta_sql
690            else parsed_expression.sql(dialect=dialect)
691        )
692        result = cls(sql=sql)
693        result._parsed = parsed_expression
694        result._parsed_dialect = dialect
695        return result
696
697    @classmethod
698    def validator(cls) -> classmethod:
699        def _validate_parsable_sql(
700            v: t.Any, info: ValidationInfo
701        ) -> t.Optional[t.Union[ParsableSql, t.List[ParsableSql]]]:
702            if v is None:
703                return v
704            if isinstance(v, str):
705                return ParsableSql(sql=v)
706            if isinstance(v, exp.Expr):
707                return ParsableSql.from_parsed_expression(
708                    v, get_dialect(info.data), use_meta_sql=False
709                )
710            if isinstance(v, list):
711                dialect = get_dialect(info.data)
712                return [
713                    ParsableSql(sql=s)
714                    if isinstance(s, str)
715                    else ParsableSql.from_parsed_expression(s, dialect, use_meta_sql=False)
716                    if isinstance(s, exp.Expr)
717                    else ParsableSql.parse_obj(s)
718                    for s in v
719                ]
720            return ParsableSql.parse_obj(v)
721
722        return field_validator(
723            "query_",
724            "expressions_",
725            "pre_statements_",
726            "post_statements_",
727            "on_virtual_update_",
728            mode="before",
729            check_fields=False,
730        )(_validate_parsable_sql)

def make_python_env( expressions: Union[sqlglot.expressions.core.Expr, List[Union[sqlglot.expressions.core.Expr, Tuple[sqlglot.expressions.core.Expr, bool]]]], jinja_macro_references: Optional[Set[sqlmesh.utils.jinja.MacroReference]], module_path: pathlib.Path, macros: sqlmesh.utils.UniqueKeyDict[str, typing.Union[sqlmesh.utils.metaprogramming.Executable, sqlmesh.core.macros.macro]], variables: Optional[Dict[str, Any]] = None, referenced_variables: Optional[Set[str]] = None, path: Optional[pathlib.Path] = None, python_env: Optional[Dict[str, sqlmesh.utils.metaprogramming.Executable]] = None, strict_resolution: bool = True, blueprint_variables: Optional[Dict[str, Any]] = None, dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType] = None) -> Dict[str, sqlmesh.utils.metaprogramming.Executable]: View Source

 40def make_python_env(
 41    expressions: t.Union[
 42        exp.Expr,
 43        t.List[t.Union[exp.Expr, t.Tuple[exp.Expr, bool]]],
 44    ],
 45    jinja_macro_references: t.Optional[t.Set[MacroReference]],
 46    module_path: Path,
 47    macros: MacroRegistry,
 48    variables: t.Optional[t.Dict[str, t.Any]] = None,
 49    referenced_variables: t.Optional[t.Set[str]] = None,
 50    path: t.Optional[Path] = None,
 51    python_env: t.Optional[t.Dict[str, Executable]] = None,
 52    strict_resolution: bool = True,
 53    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
 54    dialect: DialectType = None,
 55) -> t.Dict[str, Executable]:
 56    python_env = {} if python_env is None else python_env
 57    env: t.Dict[str, t.Tuple[t.Any, t.Optional[bool]]] = {}
 58
 59    variables = variables or {}
 60    blueprint_variables = blueprint_variables or {}
 61
 62    used_macros: t.Dict[str, t.Tuple[MacroCallable, bool]] = {}
 63
 64    # var -> True: var is metadata-only
 65    # var -> False: var is not metadata-only
 66    # var -> None: cannot determine whether var is metadata-only yet, need to walk macros first
 67    used_variables: t.Dict[str, t.Optional[bool]] = dict.fromkeys(
 68        referenced_variables or set(), False
 69    )
 70
 71    # id(expr) -> true: expr appears under the AST of a metadata-only macro function
 72    # id(expr) -> false: expr appears under the AST of a macro function whose metadata status we don't yet know
 73    expr_under_metadata_macro_func: t.Dict[int, bool] = {}
 74
 75    # For @m1(@m2(@x), @y), we'd get x -> m1 and y -> m1
 76    outermost_macro_func_ancestor_by_var: t.Dict[str, str] = {}
 77    visited_macro_funcs: t.Set[int] = set()
 78
 79    def _is_metadata_var(
 80        name: str, expression: exp.Expr, appears_in_metadata_expression: bool
 81    ) -> t.Optional[bool]:
 82        is_metadata_so_far = used_variables.get(name, True)
 83        if is_metadata_so_far is False:
 84            # We've concluded this variable is definitely not metadata-only
 85            return False
 86
 87        appears_under_metadata_macro_func = expr_under_metadata_macro_func.get(id(expression))
 88        if is_metadata_so_far and (
 89            appears_in_metadata_expression or appears_under_metadata_macro_func
 90        ):
 91            # The variable appears in a metadata expression, e.g., audits (...),
 92            # or in the AST of metadata-only macro call, e.g., @FOO(@x)
 93            return True
 94
 95        # The variable appears in the AST of a macro call, but we don't know if it's metadata-only
 96        if appears_under_metadata_macro_func is False:
 97            return None
 98
 99        # The variable appears elsewhere, e.g., in the model's query: SELECT @x
100        return False
101
102    def _is_metadata_macro(name: str, appears_in_metadata_expression: bool) -> bool:
103        if name in used_macros:
104            is_metadata_so_far = used_macros[name][1]
105            return is_metadata_so_far and appears_in_metadata_expression
106
107        return appears_in_metadata_expression
108
109    expressions = ensure_list(expressions)
110    for expression_metadata in expressions:
111        if isinstance(expression_metadata, tuple):
112            expression, is_metadata = expression_metadata
113        else:
114            expression, is_metadata = expression_metadata, False
115
116        if isinstance(expression, d.Jinja):
117            continue
118
119        for macro_func_or_var in expression.find_all(d.MacroFunc, d.MacroVar, exp.Identifier):
120            if macro_func_or_var.__class__ is d.MacroFunc:
121                name = macro_func_or_var.this.name.lower()
122                if name not in macros:
123                    continue
124
125                used_macros[name] = (macros[name], _is_metadata_macro(name, is_metadata))
126
127                if name in (c.VAR, c.BLUEPRINT_VAR):
128                    args = macro_func_or_var.this.expressions
129                    if len(args) < 1:
130                        raise_config_error(
131                            f"Macro {name.upper()} requires at least one argument", path
132                        )
133
134                    if not args[0].is_string:
135                        raise_config_error(
136                            f"The variable name must be a string literal, '{args[0].sql()}' was given instead",
137                            path,
138                        )
139
140                    var_name = args[0].this.lower()
141                    used_variables[var_name] = _is_metadata_var(
142                        var_name, macro_func_or_var, is_metadata
143                    )
144                elif id(macro_func_or_var) not in visited_macro_funcs:
145                    # We only care about the top-level macro function calls to determine the metadata
146                    # status of the variables referenced in their ASTs. For example, in @m1(@m2(@x)),
147                    # if m1 is metadata-only but m2 is not, we can still determine that @x only affects
148                    # the metadata hash, since m2's result feeds into a metadata-only macro function.
149                    #
150                    # Generally, if the top-level call is known to be metadata-only or appear in a
151                    # metadata expression, then we can avoid traversing nested macro function calls.
152
153                    var_refs, _expr_under_metadata_macro_func, _visited_macro_funcs = (
154                        _extract_macro_func_variable_references(macro_func_or_var, is_metadata)
155                    )
156                    expr_under_metadata_macro_func.update(_expr_under_metadata_macro_func)
157                    visited_macro_funcs.update(_visited_macro_funcs)
158                    outermost_macro_func_ancestor_by_var |= {var_ref: name for var_ref in var_refs}
159            elif macro_func_or_var.__class__ is d.MacroVar:
160                var_name = macro_func_or_var.name.lower()
161                if var_name in macros:
162                    used_macros[var_name] = (
163                        macros[var_name],
164                        _is_metadata_macro(var_name, is_metadata),
165                    )
166                elif var_name in variables or var_name in blueprint_variables:
167                    used_variables[var_name] = _is_metadata_var(
168                        var_name, macro_func_or_var, is_metadata
169                    )
170            elif (
171                isinstance(macro_func_or_var, (exp.Identifier, d.MacroStrReplace, d.MacroSQL))
172            ) and "@" in macro_func_or_var.name:
173                for _, identifier, braced_identifier, _ in MacroStrTemplate.pattern.findall(
174                    macro_func_or_var.name
175                ):
176                    var_name = braced_identifier or identifier
177                    if var_name in variables or var_name in blueprint_variables:
178                        used_variables[var_name] = _is_metadata_var(
179                            var_name, macro_func_or_var, is_metadata
180                        )
181
182    for macro_ref in jinja_macro_references or set():
183        if macro_ref.package is None and macro_ref.name in macros:
184            used_macros[macro_ref.name] = (macros[macro_ref.name], False)
185
186    for name, (used_macro, is_metadata) in used_macros.items():
187        if isinstance(used_macro, Executable):
188            python_env[name] = used_macro
189        elif not hasattr(used_macro, c.SQLMESH_BUILTIN) and name not in python_env:
190            build_env(
191                used_macro.func,
192                env=env,
193                name=name,
194                path=module_path,
195                is_metadata_obj=is_metadata,
196            )
197
198    python_env.update(serialize_env(env, path=module_path))
199    return _add_variables_to_python_env(
200        python_env,
201        used_variables,
202        variables,
203        blueprint_variables=blueprint_variables,
204        dialect=dialect,
205        strict_resolution=strict_resolution,
206        outermost_macro_func_ancestor_by_var=outermost_macro_func_ancestor_by_var,
207    )

def parse_dependencies( python_env: Dict[str, sqlmesh.utils.metaprogramming.Executable], entrypoint: Optional[str], strict_resolution: bool = True, variables: Optional[Dict[str, Any]] = None, blueprint_variables: Optional[Dict[str, Any]] = None) -> Tuple[Set[str], Dict[str, bool]]: View Source

322def parse_dependencies(
323    python_env: t.Dict[str, Executable],
324    entrypoint: t.Optional[str],
325    strict_resolution: bool = True,
326    variables: t.Optional[t.Dict[str, t.Any]] = None,
327    blueprint_variables: t.Optional[t.Dict[str, t.Any]] = None,
328) -> t.Tuple[t.Set[str], t.Dict[str, bool]]:
329    """
330    Parses the source of a model function and finds upstream table dependencies
331    and referenced variables based on calls to context / evaluator.
332
333    Args:
334        python_env: A dictionary of Python definitions.
335        entrypoint: The name of the function.
336        strict_resolution: If true, the arguments of `table` and `resolve_table` calls must
337            be resolvable at parse time, otherwise an exception will be raised.
338        variables: The variables available to the python environment.
339        blueprint_variables: The blueprint variables available to the python environment.
340
341    Returns:
342        A tuple containing the set of upstream table dependencies and a mapping of
343        the referenced variables associated with their metadata status.
344    """
345
346    class VariableResolutionContext:
347        """This enables calls like `resolve_table` to reference `var()` and `blueprint_var()`."""
348
349        @staticmethod
350        def var(var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]:
351            return (variables or {}).get(var_name.lower(), default)
352
353        @staticmethod
354        def blueprint_var(var_name: str, default: t.Optional[t.Any] = None) -> t.Optional[t.Any]:
355            return (blueprint_variables or {}).get(var_name.lower(), default)
356
357    env = prepare_env(python_env)
358    local_env = dict.fromkeys(("context", "evaluator"), VariableResolutionContext)
359
360    depends_on = set()
361    used_variables: t.Dict[str, bool] = {}
362
363    for executable in python_env.values():
364        if not executable.is_definition:
365            continue
366
367        is_metadata = executable.is_metadata
368        for node in ast.walk(ast.parse(executable.payload)):
369            next_variables = set()
370
371            if isinstance(node, ast.Call):
372                func = node.func
373                if not isinstance(func, ast.Attribute) or not isinstance(func.value, ast.Name):
374                    continue
375
376                def get_first_arg(keyword_arg_name: str) -> t.Any:
377                    if node.args:
378                        first_arg: t.Optional[ast.expr] = node.args[0]
379                    else:
380                        first_arg = next(
381                            (
382                                keyword.value
383                                for keyword in node.keywords
384                                if keyword.arg == keyword_arg_name
385                            ),
386                            None,
387                        )
388
389                    try:
390                        expression = ast.unparse(t.cast(ast.expr, first_arg))
391                        return eval(expression, env, local_env)
392                    except Exception:
393                        if strict_resolution:
394                            raise ConfigError(
395                                f"Error resolving dependencies for '{executable.path}'. "
396                                f"Argument '{expression.strip()}' must be resolvable at parse time."
397                            )
398
399                if func.value.id == "context" and func.attr in ("table", "resolve_table"):
400                    depends_on.add(get_first_arg("model_name"))
401                elif func.value.id in ("context", "evaluator") and func.attr in (
402                    c.VAR,
403                    c.BLUEPRINT_VAR,
404                ):
405                    next_variables.add(get_first_arg("var_name").lower())
406            elif (
407                isinstance(node, ast.Attribute)
408                and isinstance(node.value, ast.Name)
409                and node.value.id in ("context", "evaluator")
410                and node.attr == c.GATEWAY
411            ):
412                # Check whether the gateway attribute is referenced.
413                next_variables.add(c.GATEWAY)
414            elif isinstance(node, ast.FunctionDef) and node.name == entrypoint:
415                next_variables.update(
416                    [
417                        arg.arg
418                        for arg in [*node.args.args, *node.args.kwonlyargs]
419                        if arg.arg != "context"
420                    ]
421                )
422
423            for var_name in next_variables:
424                used_variables[var_name] = used_variables.get(var_name, True) and bool(is_metadata)
425
426    return depends_on, used_variables

Parses the source of a model function and finds upstream table dependencies and referenced variables based on calls to context / evaluator.

Arguments:

python_env: A dictionary of Python definitions.
entrypoint: The name of the function.
strict_resolution: If true, the arguments of table and resolve_table calls must be resolvable at parse time, otherwise an exception will be raised.
variables: The variables available to the python environment.
blueprint_variables: The blueprint variables available to the python environment.

Returns:

A tuple containing the set of upstream table dependencies and a mapping of the referenced variables associated with their metadata status.

def validate_extra_and_required_fields( klass: Type[sqlmesh.utils.pydantic.PydanticModel], provided_fields: Set[str], entity_name: str, path: Optional[pathlib.Path] = None) -> None: View Source

429def validate_extra_and_required_fields(
430    klass: t.Type[PydanticModel],
431    provided_fields: t.Set[str],
432    entity_name: str,
433    path: t.Optional[Path] = None,
434) -> None:
435    missing_required_fields = klass.missing_required_fields(provided_fields)
436    if missing_required_fields:
437        field_names = "'" + "', '".join(missing_required_fields) + "'"
438        raise_config_error(
439            f"Please add required field{'s' if len(missing_required_fields) > 1 else ''} {field_names} to the {entity_name}.",
440            path,
441        )
442
443    extra_fields = klass.extra_fields(provided_fields)
444    if extra_fields:
445        extra_field_names = "'" + "', '".join(extra_fields) + "'"
446
447        all_fields = klass.all_fields()
448        close_matches = {}
449        for field in extra_fields:
450            matches = get_close_matches(field, all_fields, n=1)
451            if matches:
452                close_matches[field] = matches[0]
453
454        if len(close_matches) == 1:
455            similar_msg = ". Did you mean " + "'" + "', '".join(close_matches.values()) + "'?"
456        else:
457            similar = [
458                f"- {field}: Did you mean '{match}'?" for field, match in close_matches.items()
459            ]
460            similar_msg = "\n\n  " + "\n  ".join(similar) if similar else ""
461
462        raise_config_error(
463            f"Invalid field name{'s' if len(extra_fields) > 1 else ''} present in the {entity_name}: {extra_field_names}{similar_msg}",
464            path,
465        )

def single_value_or_tuple( values: Sequence) -> sqlglot.expressions.core.Identifier | sqlglot.expressions.query.Tuple: View Source

468def single_value_or_tuple(values: t.Sequence) -> exp.Identifier | exp.Tuple:
469    return (
470        exp.to_identifier(values[0])
471        if len(values) == 1
472        else exp.Tuple(expressions=[exp.to_identifier(v) for v in values])
473    )

def parse_expression( cls: Type, v: Union[List[str], List[sqlglot.expressions.core.Expr], str, sqlglot.expressions.core.Expr, Callable, NoneType], info: Optional[pydantic_core.core_schema.ValidationInfo]) -> Union[List[sqlglot.expressions.core.Expr], sqlglot.expressions.core.Expr, Callable, NoneType]: View Source

476def parse_expression(
477    cls: t.Type,
478    v: t.Union[t.List[str], t.List[exp.Expr], str, exp.Expr, t.Callable, None],
479    info: t.Optional[ValidationInfo],
480) -> t.List[exp.Expr] | exp.Expr | t.Callable | None:
481    """Helper method to deserialize SQLGlot expressions in Pydantic Models."""
482    if v is None:
483        return None
484
485    if callable(v):
486        return v
487
488    dialect = validation_data(info).get("dialect") if info else ""
489
490    if isinstance(v, list):
491        return [
492            e if isinstance(e, exp.Expr) else d.parse_one(e, dialect=dialect)  # type: ignore[misc]
493            for e in v
494            if not isinstance(e, exp.Semicolon)
495        ]
496
497    if isinstance(v, str):
498        return d.parse_one(v, dialect=dialect)
499
500    if not v:
501        raise ConfigError(f"Could not parse {v}")
502
503    return v

Helper method to deserialize SQLGlot expressions in Pydantic Models.

def parse_bool(v: Any) -> bool: View Source

506def parse_bool(v: t.Any) -> bool:
507    if isinstance(v, exp.Expr):
508        if not isinstance(v, exp.Boolean):
509            from sqlglot.optimizer.simplify import simplify
510
511            # Try to reduce expressions like (1 = 1) (see: T-SQL boolean generation)
512            v = simplify(v)
513
514        if isinstance(v, exp.Boolean):
515            return v.this
516
517        return str_to_bool(v.name)
518
519    return str_to_bool(str(v or ""))

def parse_properties( cls: Type, v: Any, info: Optional[pydantic_core.core_schema.ValidationInfo]) -> Optional[sqlglot.expressions.query.Tuple]: View Source

522def parse_properties(
523    cls: t.Type, v: t.Any, info: t.Optional[ValidationInfo]
524) -> t.Optional[exp.Tuple]:
525    if v is None:
526        return v
527
528    dialect = validation_data(info).get("dialect") if info else ""
529
530    if isinstance(v, str):
531        v = d.parse_one(v, dialect=dialect)
532    if isinstance(v, (exp.Array, exp.Paren, exp.Tuple)):
533        eq_expressions: t.List[exp.Expr] = (
534            [v.unnest()] if isinstance(v, exp.Paren) else v.expressions
535        )
536
537        for eq_expr in eq_expressions:
538            if not isinstance(eq_expr, exp.EQ):
539                raise ConfigError(
540                    f"Invalid property '{eq_expr.sql(dialect=dialect)}'. "
541                    "Properties must be specified as key-value pairs <key> = <value>. "
542                )
543
544        properties = (
545            exp.Tuple(expressions=eq_expressions) if isinstance(v, (exp.Paren, exp.Array)) else v
546        )
547    elif isinstance(v, dict):
548        properties = exp.Tuple(
549            expressions=[exp.Literal.string(key).eq(value) for key, value in v.items()]
550        )
551    else:
552        raise SQLMeshError(f"Unexpected properties '{v}'")
553
554    properties.meta["dialect"] = dialect
555    return properties

def default_catalog(cls: Type, v: Any) -> Optional[str]: View Source

558def default_catalog(cls: t.Type, v: t.Any) -> t.Optional[str]:
559    if v is None:
560        return None
561    # If v is an expression then we will return expression as sql without a dialect
562    return str(v)

def depends_on( cls: Type, v: Any, info: pydantic_core.core_schema.ValidationInfo) -> Optional[Set[str]]: View Source

565def depends_on(cls: t.Type, v: t.Any, info: ValidationInfo) -> t.Optional[t.Set[str]]:
566    data = validation_data(info)
567    dialect = data.get("dialect")
568    default_catalog = data.get("default_catalog")
569
570    if isinstance(v, exp.Paren):
571        v = v.unnest()
572
573    if isinstance(v, (exp.Array, exp.Tuple)):
574        return {
575            d.normalize_model_name(
576                table.name if table.is_string else table,
577                default_catalog=default_catalog,
578                dialect=dialect,
579            )
580            for table in v.expressions
581        }
582    if isinstance(v, (exp.Table, exp.Column)):
583        return {d.normalize_model_name(v, default_catalog=default_catalog, dialect=dialect)}
584    if hasattr(v, "__iter__") and not isinstance(v, str):
585        return {
586            d.normalize_model_name(name, default_catalog=default_catalog, dialect=dialect)
587            for name in v
588        }
589
590    return v

def sort_python_env( python_env: Dict[str, sqlmesh.utils.metaprogramming.Executable]) -> List[Tuple[str, sqlmesh.utils.metaprogramming.Executable]]: View Source

593def sort_python_env(python_env: t.Dict[str, Executable]) -> t.List[t.Tuple[str, Executable]]:
594    """Returns the python env sorted."""
595    return sorted(python_env.items(), key=lambda x: (x[1].kind, x[0]))

Returns the python env sorted.

def sorted_python_env_payloads( python_env: Dict[str, sqlmesh.utils.metaprogramming.Executable]) -> List[str]: View Source

598def sorted_python_env_payloads(python_env: t.Dict[str, Executable]) -> t.List[str]:
599    """Returns the payloads of the sorted python env."""
600
601    def _executable_to_str(k: str, v: Executable) -> str:
602        result = f"# {v.path}\n" if v.path is not None else ""
603        if v.is_import or v.is_definition:
604            result += v.payload
605        else:
606            result += f"{k} = {v.payload}"
607        return result
608
609    return [_executable_to_str(k, v) for k, v in sort_python_env(python_env)]

Returns the payloads of the sorted python env.

def parse_strings_with_macro_refs( value: Any, dialect: Union[str, sqlglot.dialects.dialect.Dialect, type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Any: View Source

612def parse_strings_with_macro_refs(value: t.Any, dialect: DialectType) -> t.Any:
613    if isinstance(value, str) and "@" in value:
614        return exp.maybe_parse(value, dialect=dialect)
615
616    if isinstance(value, dict):
617        for k, v in dict(value).items():
618            value[k] = parse_strings_with_macro_refs(v, dialect)
619    elif isinstance(value, list):
620        value = [parse_strings_with_macro_refs(v, dialect) for v in value]
621
622    return value

def expression_validator( cls: Type, v: Union[List[str], List[sqlglot.expressions.core.Expr], str, sqlglot.expressions.core.Expr, Callable, NoneType], info: Optional[pydantic_core.core_schema.ValidationInfo]) -> Union[List[sqlglot.expressions.core.Expr], sqlglot.expressions.core.Expr, Callable, NoneType]: View Source

476def parse_expression(
477    cls: t.Type,
478    v: t.Union[t.List[str], t.List[exp.Expr], str, exp.Expr, t.Callable, None],
479    info: t.Optional[ValidationInfo],
480) -> t.List[exp.Expr] | exp.Expr | t.Callable | None:
481    """Helper method to deserialize SQLGlot expressions in Pydantic Models."""
482    if v is None:
483        return None
484
485    if callable(v):
486        return v
487
488    dialect = validation_data(info).get("dialect") if info else ""
489
490    if isinstance(v, list):
491        return [
492            e if isinstance(e, exp.Expr) else d.parse_one(e, dialect=dialect)  # type: ignore[misc]
493            for e in v
494            if not isinstance(e, exp.Semicolon)
495        ]
496
497    if isinstance(v, str):
498        return d.parse_one(v, dialect=dialect)
499
500    if not v:
501        raise ConfigError(f"Could not parse {v}")
502
503    return v

Helper method to deserialize SQLGlot expressions in Pydantic Models.

def bool_validator(unknown):

Wrap a classmethod, staticmethod, property or unbound function and act as a descriptor that allows us to detect decorated items from the class' attributes.

This class' __get__ returns the wrapped item's __get__ result, which makes it transparent for classmethods and staticmethods.

Attributes:

wrapped: The decorator that has to be wrapped.
decorator_info: The decorator info.
shim: A wrapper function to wrap V1 style function.

def properties_validator( cls: Type, v: Any, info: Optional[pydantic_core.core_schema.ValidationInfo]) -> Optional[sqlglot.expressions.query.Tuple]: View Source

522def parse_properties(
523    cls: t.Type, v: t.Any, info: t.Optional[ValidationInfo]
524) -> t.Optional[exp.Tuple]:
525    if v is None:
526        return v
527
528    dialect = validation_data(info).get("dialect") if info else ""
529
530    if isinstance(v, str):
531        v = d.parse_one(v, dialect=dialect)
532    if isinstance(v, (exp.Array, exp.Paren, exp.Tuple)):
533        eq_expressions: t.List[exp.Expr] = (
534            [v.unnest()] if isinstance(v, exp.Paren) else v.expressions
535        )
536
537        for eq_expr in eq_expressions:
538            if not isinstance(eq_expr, exp.EQ):
539                raise ConfigError(
540                    f"Invalid property '{eq_expr.sql(dialect=dialect)}'. "
541                    "Properties must be specified as key-value pairs <key> = <value>. "
542                )
543
544        properties = (
545            exp.Tuple(expressions=eq_expressions) if isinstance(v, (exp.Paren, exp.Array)) else v
546        )
547    elif isinstance(v, dict):
548        properties = exp.Tuple(
549            expressions=[exp.Literal.string(key).eq(value) for key, value in v.items()]
550        )
551    else:
552        raise SQLMeshError(f"Unexpected properties '{v}'")
553
554    properties.meta["dialect"] = dialect
555    return properties

Wrap a classmethod, staticmethod, property or unbound function and act as a descriptor that allows us to detect decorated items from the class' attributes.

This class' __get__ returns the wrapped item's __get__ result, which makes it transparent for classmethods and staticmethods.

Attributes:

wrapped: The decorator that has to be wrapped.
decorator_info: The decorator info.
shim: A wrapper function to wrap V1 style function.

def default_catalog_validator(cls: Type, v: Any) -> Optional[str]: View Source

558def default_catalog(cls: t.Type, v: t.Any) -> t.Optional[str]:
559    if v is None:
560        return None
561    # If v is an expression then we will return expression as sql without a dialect
562    return str(v)

Wrap a classmethod, staticmethod, property or unbound function and act as a descriptor that allows us to detect decorated items from the class' attributes.

This class' __get__ returns the wrapped item's __get__ result, which makes it transparent for classmethods and staticmethods.

Attributes:

wrapped: The decorator that has to be wrapped.
decorator_info: The decorator info.
shim: A wrapper function to wrap V1 style function.

def depends_on_validator( cls: Type, v: Any, info: pydantic_core.core_schema.ValidationInfo) -> Optional[Set[str]]: View Source

565def depends_on(cls: t.Type, v: t.Any, info: ValidationInfo) -> t.Optional[t.Set[str]]:
566    data = validation_data(info)
567    dialect = data.get("dialect")
568    default_catalog = data.get("default_catalog")
569
570    if isinstance(v, exp.Paren):
571        v = v.unnest()
572
573    if isinstance(v, (exp.Array, exp.Tuple)):
574        return {
575            d.normalize_model_name(
576                table.name if table.is_string else table,
577                default_catalog=default_catalog,
578                dialect=dialect,
579            )
580            for table in v.expressions
581        }
582    if isinstance(v, (exp.Table, exp.Column)):
583        return {d.normalize_model_name(v, default_catalog=default_catalog, dialect=dialect)}
584    if hasattr(v, "__iter__") and not isinstance(v, str):
585        return {
586            d.normalize_model_name(name, default_catalog=default_catalog, dialect=dialect)
587            for name in v
588        }
589
590    return v

Wrap a classmethod, staticmethod, property or unbound function and act as a descriptor that allows us to detect decorated items from the class' attributes.

This class' __get__ returns the wrapped item's __get__ result, which makes it transparent for classmethods and staticmethods.

Attributes:

wrapped: The decorator that has to be wrapped.
decorator_info: The decorator info.
shim: A wrapper function to wrap V1 style function.

class ParsableSql(sqlmesh.utils.pydantic.PydanticModel): View Source

671class ParsableSql(PydanticModel):
672    sql: str
673    transaction: t.Optional[bool] = None
674
675    _parsed: t.Optional[exp.Expr] = None
676    _parsed_dialect: t.Optional[str] = None
677
678    def parse(self, dialect: str) -> exp.Expr:
679        if self._parsed is None or self._parsed_dialect != dialect:
680            self._parsed = d.parse_one(self.sql, dialect=dialect)
681            self._parsed_dialect = dialect
682        return self._parsed  # type: ignore[return-value]
683
684    @classmethod
685    def from_parsed_expression(
686        cls, parsed_expression: exp.Expr, dialect: str, use_meta_sql: bool = False
687    ) -> ParsableSql:
688        sql = (
689            parsed_expression.meta.get("sql") or parsed_expression.sql(dialect=dialect)
690            if use_meta_sql
691            else parsed_expression.sql(dialect=dialect)
692        )
693        result = cls(sql=sql)
694        result._parsed = parsed_expression
695        result._parsed_dialect = dialect
696        return result
697
698    @classmethod
699    def validator(cls) -> classmethod:
700        def _validate_parsable_sql(
701            v: t.Any, info: ValidationInfo
702        ) -> t.Optional[t.Union[ParsableSql, t.List[ParsableSql]]]:
703            if v is None:
704                return v
705            if isinstance(v, str):
706                return ParsableSql(sql=v)
707            if isinstance(v, exp.Expr):
708                return ParsableSql.from_parsed_expression(
709                    v, get_dialect(info.data), use_meta_sql=False
710                )
711            if isinstance(v, list):
712                dialect = get_dialect(info.data)
713                return [
714                    ParsableSql(sql=s)
715                    if isinstance(s, str)
716                    else ParsableSql.from_parsed_expression(s, dialect, use_meta_sql=False)
717                    if isinstance(s, exp.Expr)
718                    else ParsableSql.parse_obj(s)
719                    for s in v
720                ]
721            return ParsableSql.parse_obj(v)
722
723        return field_validator(
724            "query_",
725            "expressions_",
726            "pre_statements_",
727            "post_statements_",
728            "on_virtual_update_",
729            mode="before",
730            check_fields=False,
731        )(_validate_parsable_sql)

!!! abstract "Usage Documentation" Models

A base class for creating Pydantic models.

Attributes:

__class_vars__: The names of the class variables defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: A dictionary containing metadata about generic Pydantic models. The origin and args items map to the [__origin__][genericalias.__origin__] and [__args__][genericalias.__args__] attributes of [generic aliases][types-genericalias], and the parameter item maps to the __parameter__ attribute of generic classes.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [RootModel][pydantic.root_model.RootModel].
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [extra][pydantic.config.ConfigDict.extra] is set to 'allow'.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.

sql: str

transaction: Optional[bool]

def parse(self, dialect: str) -> sqlglot.expressions.core.Expr: View Source

678    def parse(self, dialect: str) -> exp.Expr:
679        if self._parsed is None or self._parsed_dialect != dialect:
680            self._parsed = d.parse_one(self.sql, dialect=dialect)
681            self._parsed_dialect = dialect
682        return self._parsed  # type: ignore[return-value]

@classmethod

def from_parsed_expression( cls, parsed_expression: sqlglot.expressions.core.Expr, dialect: str, use_meta_sql: bool = False) -> ParsableSql: View Source

684    @classmethod
685    def from_parsed_expression(
686        cls, parsed_expression: exp.Expr, dialect: str, use_meta_sql: bool = False
687    ) -> ParsableSql:
688        sql = (
689            parsed_expression.meta.get("sql") or parsed_expression.sql(dialect=dialect)
690            if use_meta_sql
691            else parsed_expression.sql(dialect=dialect)
692        )
693        result = cls(sql=sql)
694        result._parsed = parsed_expression
695        result._parsed_dialect = dialect
696        return result

@classmethod

def validator(cls) -> classmethod: View Source

698    @classmethod
699    def validator(cls) -> classmethod:
700        def _validate_parsable_sql(
701            v: t.Any, info: ValidationInfo
702        ) -> t.Optional[t.Union[ParsableSql, t.List[ParsableSql]]]:
703            if v is None:
704                return v
705            if isinstance(v, str):
706                return ParsableSql(sql=v)
707            if isinstance(v, exp.Expr):
708                return ParsableSql.from_parsed_expression(
709                    v, get_dialect(info.data), use_meta_sql=False
710                )
711            if isinstance(v, list):
712                dialect = get_dialect(info.data)
713                return [
714                    ParsableSql(sql=s)
715                    if isinstance(s, str)
716                    else ParsableSql.from_parsed_expression(s, dialect, use_meta_sql=False)
717                    if isinstance(s, exp.Expr)
718                    else ParsableSql.parse_obj(s)
719                    for s in v
720                ]
721            return ParsableSql.parse_obj(v)
722
723        return field_validator(
724            "query_",
725            "expressions_",
726            "pre_statements_",
727            "post_statements_",
728            "on_virtual_update_",
729            mode="before",
730            check_fields=False,
731        )(_validate_parsable_sql)

model_config = {'json_encoders': {<class 'sqlglot.expressions.core.Expr'>: <function _expression_encoder>, <class 'sqlglot.expressions.datatypes.DataType'>: <function _expression_encoder>, <class 'sqlglot.expressions.query.Tuple'>: <function _expression_encoder>, typing.Union[sqlglot.expressions.query.Query, sqlmesh.core.dialect.JinjaQuery]: <function _expression_encoder>, typing.Union[sqlglot.expressions.query.Query, sqlmesh.core.dialect.JinjaQuery, sqlmesh.core.dialect.MacroFunc]: <function _expression_encoder>, <class 'datetime.tzinfo'>: <function PydanticModel.<lambda>>}, 'arbitrary_types_allowed': True, 'extra': 'forbid', 'protected_namespaces': ()}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None: View Source

365def init_private_attributes(self: BaseModel, context: Any, /) -> None:
366    """This function is meant to behave like a BaseModel method to initialize private attributes.
367
368    It takes context as an argument since that's what pydantic-core passes when calling it.
369
370    Args:
371        self: The BaseModel instance.
372        context: The context.
373    """
374    if getattr(self, '__pydantic_private__', None) is None:
375        pydantic_private = {}
376        for name, private_attr in self.__private_attributes__.items():
377            # Avoid needlessly creating a new dict for the validated data:
378            if private_attr.default_factory_takes_validated_data:
379                default = private_attr.get_default(
380                    call_default_factory=True, validated_data={**self.__dict__, **pydantic_private}
381                )
382            else:
383                default = private_attr.get_default(call_default_factory=True)
384            if default is not PydanticUndefined:
385                pydantic_private[name] = default
386        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialize private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Arguments:

self: The BaseModel instance.
context: The context.

Inherited Members

pydantic.main.BaseModel: BaseModel; model_fields; model_computed_fields; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_rebuild; model_validate; model_validate_json; model_validate_strings; parse_file; from_orm; construct; schema; schema_json; validate; update_forward_refs
sqlmesh.utils.pydantic.PydanticModel: dict; json; copy; fields_set; parse_obj; parse_raw; missing_required_fields; extra_fields; all_fields; all_field_infos; required_fields