Edit on GitHub

sqlmesh.core.dialect

View Source

   1from __future__ import annotations
   2
   3import functools
   4import re
   5import sys
   6import typing as t
   7from contextlib import contextmanager
   8from difflib import unified_diff
   9from enum import Enum, auto
  10
  11import pandas as pd
  12from sqlglot import Dialect, Generator, ParseError, Parser, Tokenizer, TokenType, exp
  13from sqlglot.dialects.dialect import DialectType
  14from sqlglot.dialects.snowflake import Snowflake
  15from sqlglot.helper import seq_get
  16from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
  17from sqlglot.optimizer.qualify_columns import quote_identifiers
  18from sqlglot.optimizer.qualify_tables import qualify_tables
  19from sqlglot.optimizer.scope import traverse_scope
  20from sqlglot.schema import MappingSchema
  21from sqlglot.tokens import Token
  22
  23from sqlmesh.core.constants import MAX_MODEL_DEFINITION_SIZE
  24from sqlmesh.utils.errors import SQLMeshError
  25from sqlmesh.utils.pandas import columns_to_types_from_df
  26
  27if t.TYPE_CHECKING:
  28    from sqlglot._typing import E
  29
  30    from sqlmesh.utils.pandas import PandasNamedTuple
  31
  32SQLMESH_MACRO_PREFIX = "@"
  33
  34TABLES_META = "sqlmesh.tables"
  35
  36
  37class Model(exp.Expression):
  38    arg_types = {"expressions": True}
  39
  40
  41class Audit(exp.Expression):
  42    arg_types = {"expressions": True}
  43
  44
  45class Metric(exp.Expression):
  46    arg_types = {"expressions": True}
  47
  48
  49class Jinja(exp.Func):
  50    arg_types = {"this": True}
  51
  52
  53class JinjaQuery(Jinja):
  54    pass
  55
  56
  57class JinjaStatement(Jinja):
  58    pass
  59
  60
  61class ModelKind(exp.Expression):
  62    arg_types = {"this": True, "expressions": False}
  63
  64
  65class MacroVar(exp.Var):
  66    pass
  67
  68
  69class MacroFunc(exp.Func):
  70    @property
  71    def name(self) -> str:
  72        return self.this.name
  73
  74
  75class MacroDef(MacroFunc):
  76    arg_types = {"this": True, "expression": True}
  77
  78
  79class MacroSQL(MacroFunc):
  80    arg_types = {"this": True, "into": False}
  81
  82
  83class MacroStrReplace(MacroFunc):
  84    pass
  85
  86
  87class PythonCode(exp.Expression):
  88    arg_types = {"expressions": True}
  89
  90
  91class DColonCast(exp.Cast):
  92    pass
  93
  94
  95class MetricAgg(exp.AggFunc):
  96    """Used for computing metrics."""
  97
  98    arg_types = {"this": True}
  99
 100    @property
 101    def output_name(self) -> str:
 102        return self.this.name
 103
 104
 105class StagedFilePath(exp.Table):
 106    """Represents paths to "staged files" in Snowflake."""
 107
 108
 109def _parse_statement(self: Parser) -> t.Optional[exp.Expression]:
 110    if self._curr is None:
 111        return None
 112
 113    parser = PARSERS.get(self._curr.text.upper())
 114
 115    if parser:
 116        # Capture any available description in the form of a comment
 117        comments = self._curr.comments
 118
 119        index = self._index
 120        try:
 121            self._advance()
 122            meta = self._parse_wrapped(lambda: t.cast(t.Callable, parser)(self))
 123        except ParseError:
 124            self._retreat(index)
 125
 126        # Only return the DDL expression if we actually managed to parse one. This is
 127        # done in order to allow parsing standalone identifiers / function calls like
 128        # "metric", or "model(1, 2, 3)", which collide with SQLMesh's DDL syntax.
 129        if self._index != index:
 130            meta.comments = comments
 131            return meta
 132
 133    return self.__parse_statement()  # type: ignore
 134
 135
 136def _parse_lambda(self: Parser, alias: bool = False) -> t.Optional[exp.Expression]:
 137    node = self.__parse_lambda(alias=alias)  # type: ignore
 138    if isinstance(node, exp.Lambda):
 139        node.set("this", self._parse_alias(node.this))
 140    return node
 141
 142
 143def _parse_id_var(
 144    self: Parser,
 145    any_token: bool = True,
 146    tokens: t.Optional[t.Collection[TokenType]] = None,
 147) -> t.Optional[exp.Expression]:
 148    if self._prev and self._prev.text == SQLMESH_MACRO_PREFIX and self._match(TokenType.L_BRACE):
 149        identifier = self.__parse_id_var(any_token=any_token, tokens=tokens)  # type: ignore
 150        if not self._match(TokenType.R_BRACE):
 151            self.raise_error("Expecting }")
 152        identifier.args["this"] = f"@{{{identifier.name}}}"
 153    else:
 154        identifier = self.__parse_id_var(any_token=any_token, tokens=tokens)  # type: ignore
 155
 156    while (
 157        identifier
 158        and self._is_connected()
 159        and (
 160            self._match_texts(("{", SQLMESH_MACRO_PREFIX))
 161            or self._curr.token_type not in self.RESERVED_TOKENS
 162        )
 163    ):
 164        this = identifier.name
 165        brace = False
 166
 167        if self._prev.text == "{":
 168            this += "{"
 169            brace = True
 170        else:
 171            if self._prev.text == SQLMESH_MACRO_PREFIX:
 172                this += "@"
 173            if self._match(TokenType.L_BRACE):
 174                this += "{"
 175                brace = True
 176
 177        next_id = self._parse_id_var(any_token=False)
 178
 179        if next_id:
 180            this += next_id.name
 181        else:
 182            return identifier
 183
 184        if brace:
 185            if self._match(TokenType.R_BRACE):
 186                this += "}"
 187            else:
 188                self.raise_error("Expecting }")
 189
 190        identifier = self.expression(exp.Identifier, this=this, quoted=identifier.quoted)
 191
 192    return identifier
 193
 194
 195def _parse_macro(self: Parser, keyword_macro: str = "") -> t.Optional[exp.Expression]:
 196    if self._prev.text != SQLMESH_MACRO_PREFIX:
 197        return self._parse_parameter()
 198
 199    comments = self._prev.comments
 200    index = self._index
 201    field = self._parse_primary() or self._parse_function(functions={}) or self._parse_id_var()
 202
 203    if isinstance(field, exp.Func):
 204        macro_name = field.name.upper()
 205        if macro_name != keyword_macro and macro_name in KEYWORD_MACROS:
 206            self._retreat(index)
 207            return None
 208
 209        if isinstance(field, exp.Anonymous):
 210            if macro_name == "DEF":
 211                return self.expression(
 212                    MacroDef,
 213                    this=field.expressions[0],
 214                    expression=field.expressions[1],
 215                    comments=comments,
 216                )
 217            if macro_name == "SQL":
 218                into = field.expressions[1].this.lower() if len(field.expressions) > 1 else None
 219                return self.expression(
 220                    MacroSQL, this=field.expressions[0], into=into, comments=comments
 221                )
 222        else:
 223            field = self.expression(
 224                exp.Anonymous,
 225                this=field.sql_name(),
 226                expressions=list(field.args.values()),
 227                comments=comments,
 228            )
 229
 230        return self.expression(MacroFunc, this=field, comments=comments)
 231
 232    if field is None:
 233        return None
 234
 235    if field.is_string or (isinstance(field, exp.Identifier) and field.quoted):
 236        return self.expression(
 237            MacroStrReplace, this=exp.Literal.string(field.this), comments=comments
 238        )
 239
 240    if "@" in field.this:
 241        return field
 242    return self.expression(MacroVar, this=field.this, comments=comments)
 243
 244
 245KEYWORD_MACROS = {"WITH", "JOIN", "WHERE", "GROUP_BY", "HAVING", "ORDER_BY", "LIMIT"}
 246
 247
 248def _parse_matching_macro(self: Parser, name: str) -> t.Optional[exp.Expression]:
 249    if not self._match_pair(TokenType.PARAMETER, TokenType.VAR, advance=False) or (
 250        self._next and self._next.text.upper() != name.upper()
 251    ):
 252        return None
 253
 254    self._advance()
 255    return _parse_macro(self, keyword_macro=name)
 256
 257
 258def _parse_body_macro(self: Parser) -> t.Tuple[str, t.Optional[exp.Expression]]:
 259    name = self._next and self._next.text.upper()
 260
 261    if name == "JOIN":
 262        return ("joins", self._parse_join())
 263    if name == "WHERE":
 264        return ("where", self._parse_where())
 265    if name == "GROUP_BY":
 266        return ("group", self._parse_group())
 267    if name == "HAVING":
 268        return ("having", self._parse_having())
 269    if name == "ORDER_BY":
 270        return ("order", self._parse_order())
 271    if name == "LIMIT":
 272        return ("limit", self._parse_limit())
 273    return ("", None)
 274
 275
 276def _parse_with(self: Parser, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
 277    macro = _parse_matching_macro(self, "WITH")
 278    if not macro:
 279        return self.__parse_with(skip_with_token=skip_with_token)  # type: ignore
 280
 281    macro.this.append("expressions", self.__parse_with(skip_with_token=True))  # type: ignore
 282    return macro
 283
 284
 285def _parse_join(
 286    self: Parser, skip_join_token: bool = False, parse_bracket: bool = False
 287) -> t.Optional[exp.Expression]:
 288    index = self._index
 289    method, side, kind = self._parse_join_parts()
 290    macro = _parse_matching_macro(self, "JOIN")
 291    if not macro:
 292        self._retreat(index)
 293        return self.__parse_join(skip_join_token=skip_join_token, parse_bracket=parse_bracket)  # type: ignore
 294
 295    join = self.__parse_join(skip_join_token=True)  # type: ignore
 296    if method:
 297        join.set("method", method.text)
 298    if side:
 299        join.set("side", side.text)
 300    if kind:
 301        join.set("kind", kind.text)
 302
 303    macro.this.append("expressions", join)
 304    return macro
 305
 306
 307def _parse_where(self: Parser, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
 308    macro = _parse_matching_macro(self, "WHERE")
 309    if not macro:
 310        return self.__parse_where(skip_where_token=skip_where_token)  # type: ignore
 311
 312    macro.this.append("expressions", self.__parse_where(skip_where_token=True))  # type: ignore
 313    return macro
 314
 315
 316def _parse_group(self: Parser, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
 317    macro = _parse_matching_macro(self, "GROUP_BY")
 318    if not macro:
 319        return self.__parse_group(skip_group_by_token=skip_group_by_token)  # type: ignore
 320
 321    macro.this.append("expressions", self.__parse_group(skip_group_by_token=True))  # type: ignore
 322    return macro
 323
 324
 325def _parse_having(self: Parser, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
 326    macro = _parse_matching_macro(self, "HAVING")
 327    if not macro:
 328        return self.__parse_having(skip_having_token=skip_having_token)  # type: ignore
 329
 330    macro.this.append("expressions", self.__parse_having(skip_having_token=True))  # type: ignore
 331    return macro
 332
 333
 334def _parse_order(
 335    self: Parser, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
 336) -> t.Optional[exp.Expression]:
 337    macro = _parse_matching_macro(self, "ORDER_BY")
 338    if not macro:
 339        return self.__parse_order(this, skip_order_token=skip_order_token)  # type: ignore
 340
 341    macro.this.append("expressions", self.__parse_order(this, skip_order_token=True))  # type: ignore
 342    return macro
 343
 344
 345def _parse_limit(
 346    self: Parser,
 347    this: t.Optional[exp.Expression] = None,
 348    top: bool = False,
 349    skip_limit_token: bool = False,
 350) -> t.Optional[exp.Expression]:
 351    macro = _parse_matching_macro(self, "TOP" if top else "LIMIT")
 352    if not macro:
 353        return self.__parse_limit(this, top=top, skip_limit_token=skip_limit_token)  # type: ignore
 354
 355    macro.this.append("expressions", self.__parse_limit(this, top=top, skip_limit_token=True))  # type: ignore
 356    return macro
 357
 358
 359def _parse_props(self: Parser) -> t.Optional[exp.Expression]:
 360    key = self._parse_id_var(any_token=True)
 361    if not key:
 362        return None
 363
 364    name = key.name.lower()
 365    if name == "when_matched":
 366        value: t.Optional[exp.Expression] = self._parse_when_matched()[0]
 367    elif self._match(TokenType.L_PAREN):
 368        value = self.expression(exp.Tuple, expressions=self._parse_csv(self._parse_equality))
 369        self._match_r_paren()
 370    else:
 371        value = self._parse_bracket(self._parse_field(any_token=True))
 372
 373    if name == "path" and value:
 374        # Make sure if we get a windows path that it is converted to posix
 375        value = exp.Literal.string(value.this.replace("\\", "/"))
 376
 377    return self.expression(exp.Property, this=name, value=value)
 378
 379
 380def _parse_types(
 381    self: Parser,
 382    check_func: bool = False,
 383    schema: bool = False,
 384    allow_identifiers: bool = True,
 385) -> t.Optional[exp.Expression]:
 386    start = self._curr
 387    parsed_type = self.__parse_types(  # type: ignore
 388        check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
 389    )
 390
 391    if schema and parsed_type:
 392        parsed_type.meta["sql"] = self._find_sql(start, self._prev)
 393
 394    return parsed_type
 395
 396
 397# Only needed for Snowflake: its "staged file" syntax (@<path>) clashes with our macro
 398# var syntax. By converting the Var representation to a MacroVar, we should be able to
 399# handle both use cases: if there's no value in the MacroEvaluator's context for that
 400# MacroVar, it'll render into @<path>, so it won't break staged file path references.
 401#
 402# See: https://docs.snowflake.com/en/user-guide/querying-stage
 403def _parse_table_parts(
 404    self: Parser, schema: bool = False, is_db_reference: bool = False
 405) -> exp.Table:
 406    index = self._index
 407    table = self.__parse_table_parts(schema=schema, is_db_reference=is_db_reference)  # type: ignore
 408    table_arg = table.this
 409
 410    if isinstance(table_arg, exp.Var) and table_arg.name.startswith(SQLMESH_MACRO_PREFIX):
 411        # Macro functions do not clash with the staged file syntax, so we can safely parse them
 412        if "(" in table_arg.name:
 413            self._retreat(index)
 414            return Parser._parse_table_parts(self, schema=schema, is_db_reference=is_db_reference)
 415
 416        table_arg.replace(MacroVar(this=table_arg.name[1:]))
 417        return StagedFilePath(**table.args)
 418
 419    return table
 420
 421
 422def _parse_if(self: Parser) -> t.Optional[exp.Expression]:
 423    # If we fail to parse an IF function with expressions as arguments, we then try
 424    # to parse a statement / command to support the macro @IF(condition, statement)
 425    index = self._index
 426    try:
 427        return self.__parse_if()  # type: ignore
 428    except ParseError:
 429        self._retreat(index)
 430        self._match_l_paren()
 431
 432        cond = self._parse_conjunction()
 433        self._match(TokenType.COMMA)
 434
 435        # Try to parse a known statement, otherwise fall back to parsing a command
 436        index = self._index
 437        statement = self._parse_statement()
 438        if isinstance(statement, exp.Command):
 439            self._retreat(index)
 440            statement = self._parse_as_command(self._curr)
 441
 442            # Unconsume the right parenthesis as well as omit it from the command's text
 443            self._retreat(self._index - 1)
 444            statement.set("expression", statement.expression[:-1])
 445
 446        # Return anonymous so that _parse_macro can create a MacroFunc with this value
 447        self._match_r_paren()
 448        return exp.Anonymous(this="IF", expressions=[cond, statement])
 449
 450
 451def _create_parser(parser_type: t.Type[exp.Expression], table_keys: t.List[str]) -> t.Callable:
 452    def parse(self: Parser) -> t.Optional[exp.Expression]:
 453        from sqlmesh.core.model.kind import ModelKindName
 454
 455        expressions: t.List[exp.Expression] = []
 456
 457        while True:
 458            prev_property = seq_get(expressions, -1)
 459            if not self._match(TokenType.COMMA, expression=prev_property) and expressions:
 460                break
 461
 462            key_expression = self._parse_id_var(any_token=True)
 463            if not key_expression:
 464                break
 465
 466            # This allows macro functions that programmaticaly generate the property key-value pair
 467            if isinstance(key_expression, MacroFunc):
 468                expressions.append(key_expression)
 469                continue
 470
 471            key = key_expression.name.lower()
 472
 473            start = self._curr
 474            value: t.Optional[exp.Expression | str]
 475
 476            if key in table_keys:
 477                value = self._parse_table_parts()
 478            elif key == "columns":
 479                value = self._parse_schema()
 480            elif key == "kind":
 481                id_var = self._parse_id_var(any_token=True)
 482                if not id_var:
 483                    value = None
 484                else:
 485                    kind = ModelKindName[id_var.name.upper()]
 486
 487                    if kind in (
 488                        ModelKindName.INCREMENTAL_BY_TIME_RANGE,
 489                        ModelKindName.INCREMENTAL_BY_UNIQUE_KEY,
 490                        ModelKindName.SEED,
 491                        ModelKindName.VIEW,
 492                        ModelKindName.SCD_TYPE_2,
 493                        ModelKindName.SCD_TYPE_2_BY_TIME,
 494                        ModelKindName.SCD_TYPE_2_BY_COLUMN,
 495                    ) and self._match(TokenType.L_PAREN, advance=False):
 496                        props = self._parse_wrapped_csv(functools.partial(_parse_props, self))
 497                    else:
 498                        props = None
 499
 500                    value = self.expression(
 501                        ModelKind,
 502                        this=kind.value,
 503                        expressions=props,
 504                    )
 505            elif key == "expression":
 506                value = self._parse_conjunction()
 507            else:
 508                value = self._parse_bracket(self._parse_field(any_token=True))
 509
 510            if isinstance(value, exp.Expression):
 511                value.meta["sql"] = self._find_sql(start, self._prev)
 512
 513            expressions.append(self.expression(exp.Property, this=key, value=value))
 514
 515        return self.expression(parser_type, expressions=expressions)
 516
 517    return parse
 518
 519
 520PARSERS = {
 521    "MODEL": _create_parser(Model, ["name"]),
 522    "AUDIT": _create_parser(Audit, ["model"]),
 523    "METRIC": _create_parser(Metric, ["name"]),
 524}
 525
 526
 527def _props_sql(self: Generator, expressions: t.List[exp.Expression]) -> str:
 528    props = []
 529    size = len(expressions)
 530
 531    for i, prop in enumerate(expressions):
 532        sql = self.indent(f"{prop.name} {self.sql(prop, 'value')}")
 533
 534        if i < size - 1:
 535            sql += ","
 536        props.append(self.maybe_comment(sql, expression=prop))
 537
 538    return "\n".join(props)
 539
 540
 541def _sqlmesh_ddl_sql(self: Generator, expression: Model | Audit | Metric, name: str) -> str:
 542    return "\n".join([f"{name} (", _props_sql(self, expression.expressions), ")"])
 543
 544
 545def _model_kind_sql(self: Generator, expression: ModelKind) -> str:
 546    props = _props_sql(self, expression.expressions)
 547    if props:
 548        return "\n".join([f"{expression.this} (", props, ")"])
 549    return expression.name.upper()
 550
 551
 552def _macro_keyword_func_sql(self: Generator, expression: exp.Expression) -> str:
 553    name = expression.name
 554    keyword = name.replace("_", " ")
 555    *args, clause = expression.expressions
 556    macro = f"@{name}({self.format_args(*args)})"
 557    return self.sql(clause).replace(keyword, macro, 1)
 558
 559
 560def _macro_func_sql(self: Generator, expression: MacroFunc) -> str:
 561    expression = expression.this
 562    name = expression.name
 563    if name in KEYWORD_MACROS:
 564        return _macro_keyword_func_sql(self, expression)
 565    return f"@{name}({self.format_args(*expression.expressions)})"
 566
 567
 568def _override(klass: t.Type[Tokenizer | Parser], func: t.Callable) -> None:
 569    name = func.__name__
 570    setattr(klass, f"_{name}", getattr(klass, name))
 571    setattr(klass, name, func)
 572
 573
 574def format_model_expressions(
 575    expressions: t.List[exp.Expression], dialect: t.Optional[str] = None, **kwargs: t.Any
 576) -> str:
 577    """Format a model's expressions into a standardized format.
 578
 579    Args:
 580        expressions: The model's expressions, must be at least model def + query.
 581        dialect: The dialect to render the expressions as.
 582        **kwargs: Additional keyword arguments to pass to the sql generator.
 583
 584    Returns:
 585        A string representing the formatted model.
 586    """
 587    if len(expressions) == 1:
 588        return expressions[0].sql(pretty=True, dialect=dialect)
 589
 590    *statements, query = expressions
 591
 592    def cast_to_colon(node: exp.Expression) -> exp.Expression:
 593        if isinstance(node, exp.Cast) and not any(
 594            # Only convert CAST into :: if it doesn't have additional args set, otherwise this
 595            # conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST)
 596            arg
 597            for name, arg in node.args.items()
 598            if name not in ("this", "to")
 599        ):
 600            this = node.this
 601
 602            if not isinstance(this, (exp.Binary, exp.Unary)) or isinstance(this, exp.Paren):
 603                cast = DColonCast(this=this, to=node.to)
 604                cast.comments = node.comments
 605                node = cast
 606
 607        exp.replace_children(node, cast_to_colon)
 608        return node
 609
 610    query = query.copy()
 611    exp.replace_children(query, cast_to_colon)
 612
 613    return ";\n\n".join(
 614        [
 615            *(statement.sql(pretty=True, dialect=dialect, **kwargs) for statement in statements),
 616            query.sql(pretty=True, dialect=dialect, **kwargs),
 617        ]
 618    ).strip()
 619
 620
 621def text_diff(
 622    a: t.List[exp.Expression],
 623    b: t.List[exp.Expression],
 624    a_dialect: t.Optional[str] = None,
 625    b_dialect: t.Optional[str] = None,
 626) -> str:
 627    """Find the unified text diff between two expressions."""
 628    a_sql = [
 629        line
 630        for expr in a
 631        for line in expr.sql(pretty=True, comments=False, dialect=a_dialect).split("\n")
 632    ]
 633    b_sql = [
 634        line
 635        for expr in b
 636        for line in expr.sql(pretty=True, comments=False, dialect=b_dialect).split("\n")
 637    ]
 638    return "\n".join(unified_diff(a_sql, b_sql))
 639
 640
 641DIALECT_PATTERN = re.compile(
 642    r"(model|audit).*?\(.*?dialect[^a-z,]+([a-z]*|,)", re.IGNORECASE | re.DOTALL
 643)
 644
 645
 646def _is_command_statement(command: str, tokens: t.List[Token], pos: int) -> bool:
 647    try:
 648        return (
 649            tokens[pos].text.upper() == command.upper()
 650            and tokens[pos + 1].token_type == TokenType.SEMICOLON
 651        )
 652    except IndexError:
 653        return False
 654
 655
 656JINJA_QUERY_BEGIN = "JINJA_QUERY_BEGIN"
 657JINJA_STATEMENT_BEGIN = "JINJA_STATEMENT_BEGIN"
 658JINJA_END = "JINJA_END"
 659
 660
 661def _is_jinja_statement_begin(tokens: t.List[Token], pos: int) -> bool:
 662    return _is_command_statement(JINJA_STATEMENT_BEGIN, tokens, pos)
 663
 664
 665def _is_jinja_query_begin(tokens: t.List[Token], pos: int) -> bool:
 666    return _is_command_statement(JINJA_QUERY_BEGIN, tokens, pos)
 667
 668
 669def _is_jinja_end(tokens: t.List[Token], pos: int) -> bool:
 670    return _is_command_statement(JINJA_END, tokens, pos)
 671
 672
 673def jinja_query(query: str) -> JinjaQuery:
 674    return JinjaQuery(this=exp.Literal.string(query.strip()))
 675
 676
 677def jinja_statement(statement: str) -> JinjaStatement:
 678    return JinjaStatement(this=exp.Literal.string(statement.strip()))
 679
 680
 681class ChunkType(Enum):
 682    JINJA_QUERY = auto()
 683    JINJA_STATEMENT = auto()
 684    SQL = auto()
 685
 686
 687def parse_one(
 688    sql: str, dialect: t.Optional[str] = None, into: t.Optional[exp.IntoType] = None
 689) -> exp.Expression:
 690    expressions = parse(sql, default_dialect=dialect, match_dialect=False, into=into)
 691    if not expressions:
 692        raise SQLMeshError(f"No expressions found in '{sql}'")
 693    elif len(expressions) > 1:
 694        raise SQLMeshError(f"Multiple expressions found in '{sql}'")
 695    return expressions[0]
 696
 697
 698def parse(
 699    sql: str,
 700    default_dialect: t.Optional[str] = None,
 701    match_dialect: bool = True,
 702    into: t.Optional[exp.IntoType] = None,
 703) -> t.List[exp.Expression]:
 704    """Parse a sql string.
 705
 706    Supports parsing model definition.
 707    If a jinja block is detected, the query is stored as raw string in a Jinja node.
 708
 709    Args:
 710        sql: The sql based definition.
 711        default_dialect: The dialect to use if the model does not specify one.
 712
 713    Returns:
 714        A list of the parsed expressions: [Model, *Statements, Query, *Statements]
 715    """
 716    match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE])
 717    dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect)
 718
 719    tokens = dialect.tokenizer.tokenize(sql)
 720    chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)]
 721    total = len(tokens)
 722
 723    pos = 0
 724    while pos < total:
 725        token = tokens[pos]
 726        if _is_jinja_end(tokens, pos) or (
 727            chunks[-1][1] == ChunkType.SQL
 728            and token.token_type == TokenType.SEMICOLON
 729            and pos < total - 1
 730        ):
 731            if token.token_type == TokenType.SEMICOLON:
 732                pos += 1
 733            else:
 734                # Jinja end statement
 735                chunks[-1][0].append(token)
 736                pos += 2
 737            chunks.append(([], ChunkType.SQL))
 738        elif _is_jinja_query_begin(tokens, pos):
 739            chunks.append(([token], ChunkType.JINJA_QUERY))
 740            pos += 2
 741        elif _is_jinja_statement_begin(tokens, pos):
 742            chunks.append(([token], ChunkType.JINJA_STATEMENT))
 743            pos += 2
 744        else:
 745            chunks[-1][0].append(token)
 746            pos += 1
 747
 748    parser = dialect.parser()
 749    expressions: t.List[exp.Expression] = []
 750
 751    for chunk, chunk_type in chunks:
 752        if chunk_type == ChunkType.SQL:
 753            parsed_expressions: t.List[t.Optional[exp.Expression]] = (
 754                parser.parse(chunk, sql) if into is None else parser.parse_into(into, chunk, sql)
 755            )
 756            for expression in parsed_expressions:
 757                if expression:
 758                    expression.meta["sql"] = parser._find_sql(chunk[0], chunk[-1])
 759                    expressions.append(expression)
 760        else:
 761            start, *_, end = chunk
 762            segment = sql[start.end + 2 : end.start - 1]
 763            factory = jinja_query if chunk_type == ChunkType.JINJA_QUERY else jinja_statement
 764            expression = factory(segment.strip())
 765            expression.meta["sql"] = sql[start.start : end.end + 1]
 766            expressions.append(expression)
 767
 768    return expressions
 769
 770
 771def extend_sqlglot() -> None:
 772    """Extend SQLGlot with SQLMesh's custom macro aware dialect."""
 773    tokenizers = {Tokenizer}
 774    parsers = {Parser}
 775    generators = {Generator}
 776
 777    for dialect in Dialect.classes.values():
 778        if hasattr(dialect, "Tokenizer"):
 779            tokenizers.add(dialect.Tokenizer)
 780        if hasattr(dialect, "Parser"):
 781            parsers.add(dialect.Parser)
 782        if hasattr(dialect, "Generator"):
 783            generators.add(dialect.Generator)
 784
 785    for tokenizer in tokenizers:
 786        tokenizer.VAR_SINGLE_TOKENS.update(SQLMESH_MACRO_PREFIX)
 787
 788    for parser in parsers:
 789        parser.FUNCTIONS.update({"JINJA": Jinja.from_arg_list, "METRIC": MetricAgg.from_arg_list})
 790        parser.PLACEHOLDER_PARSERS.update({TokenType.PARAMETER: _parse_macro})
 791        parser.QUERY_MODIFIER_PARSERS.update(
 792            {TokenType.PARAMETER: lambda self: _parse_body_macro(self)}
 793        )
 794
 795    for generator in generators:
 796        if MacroFunc not in generator.TRANSFORMS:
 797            generator.TRANSFORMS.update(
 798                {
 799                    Audit: lambda self, e: _sqlmesh_ddl_sql(self, e, "Audit"),
 800                    DColonCast: lambda self, e: f"{self.sql(e, 'this')}::{self.sql(e, 'to')}",
 801                    Jinja: lambda self, e: e.name,
 802                    JinjaQuery: lambda self, e: f"{JINJA_QUERY_BEGIN};\n{e.name}\n{JINJA_END};",
 803                    JinjaStatement: lambda self,
 804                    e: f"{JINJA_STATEMENT_BEGIN};\n{e.name}\n{JINJA_END};",
 805                    MacroDef: lambda self, e: f"@DEF({self.sql(e.this)}, {self.sql(e.expression)})",
 806                    MacroFunc: _macro_func_sql,
 807                    MacroStrReplace: lambda self, e: f"@{self.sql(e.this)}",
 808                    MacroSQL: lambda self, e: f"@SQL({self.sql(e.this)})",
 809                    MacroVar: lambda self, e: f"@{e.name}",
 810                    Metric: lambda self, e: _sqlmesh_ddl_sql(self, e, "METRIC"),
 811                    Model: lambda self, e: _sqlmesh_ddl_sql(self, e, "MODEL"),
 812                    ModelKind: _model_kind_sql,
 813                    PythonCode: lambda self, e: self.expressions(e, sep="\n", indent=False),
 814                    StagedFilePath: lambda self, e: self.table_sql(e),
 815                }
 816            )
 817
 818            generator.WITH_SEPARATED_COMMENTS = (
 819                *generator.WITH_SEPARATED_COMMENTS,
 820                Model,
 821                MacroDef,
 822            )
 823
 824    _override(Parser, _parse_statement)
 825    _override(Parser, _parse_join)
 826    _override(Parser, _parse_order)
 827    _override(Parser, _parse_where)
 828    _override(Parser, _parse_group)
 829    _override(Parser, _parse_with)
 830    _override(Parser, _parse_having)
 831    _override(Parser, _parse_limit)
 832    _override(Parser, _parse_lambda)
 833    _override(Parser, _parse_types)
 834    _override(Parser, _parse_if)
 835    _override(Parser, _parse_id_var)
 836    _override(Snowflake.Parser, _parse_table_parts)
 837
 838
 839def select_from_values(
 840    values: t.List[PandasNamedTuple],
 841    columns_to_types: t.Dict[str, exp.DataType],
 842    batch_size: int = 0,
 843    alias: str = "t",
 844) -> t.Iterator[exp.Select]:
 845    """Generate a VALUES expression that has a select wrapped around it to cast the values to their correct types.
 846
 847    Args:
 848        values: List of values to use for the VALUES expression.
 849        columns_to_types: Mapping of column names to types to assign to the values.
 850        batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
 851        alias: The alias to assign to the values expression. If not provided then will default to "t"
 852
 853    Returns:
 854        This method operates as a generator and yields a VALUES expression.
 855    """
 856    if batch_size <= 0:
 857        batch_size = sys.maxsize
 858    num_rows = len(values)
 859    for i in range(0, num_rows, batch_size):
 860        yield select_from_values_for_batch_range(
 861            values=values,
 862            columns_to_types=columns_to_types,
 863            batch_start=i,
 864            batch_end=min(i + batch_size, num_rows),
 865            alias=alias,
 866        )
 867
 868
 869def select_from_values_for_batch_range(
 870    values: t.List[PandasNamedTuple],
 871    columns_to_types: t.Dict[str, exp.DataType],
 872    batch_start: int,
 873    batch_end: int,
 874    alias: str = "t",
 875) -> exp.Select:
 876    casted_columns = [
 877        exp.alias_(exp.cast(column, to=kind), column, copy=False)
 878        for column, kind in columns_to_types.items()
 879    ]
 880
 881    if not values:
 882        # Ensures we don't generate an empty VALUES clause & forces a zero-row output
 883        where = exp.false()
 884        expressions = [tuple(exp.cast(exp.null(), to=kind) for kind in columns_to_types.values())]
 885    else:
 886        where = None
 887        expressions = [
 888            tuple(transform_values(v, columns_to_types)) for v in values[batch_start:batch_end]
 889        ]
 890
 891    values_exp = exp.values(expressions, alias=alias, columns=columns_to_types)
 892    return exp.select(*casted_columns).from_(values_exp, copy=False).where(where, copy=False)
 893
 894
 895def pandas_to_sql(
 896    df: pd.DataFrame,
 897    columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
 898    batch_size: int = 0,
 899    alias: str = "t",
 900) -> t.Iterator[exp.Select]:
 901    """Convert a pandas dataframe into a VALUES sql statement.
 902
 903    Args:
 904        df: A pandas dataframe to convert.
 905        columns_to_types: Mapping of column names to types to assign to the values.
 906        batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
 907        alias: The alias to assign to the values expression. If not provided then will default to "t"
 908
 909    Returns:
 910        This method operates as a generator and yields a VALUES expression.
 911    """
 912    yield from select_from_values(
 913        values=list(df.itertuples(index=False, name=None)),
 914        columns_to_types=columns_to_types or columns_to_types_from_df(df),
 915        batch_size=batch_size,
 916        alias=alias,
 917    )
 918
 919
 920def set_default_catalog(
 921    table: str | exp.Table,
 922    default_catalog: t.Optional[str],
 923) -> exp.Table:
 924    table = exp.to_table(table)
 925
 926    if default_catalog and not table.catalog and table.db:
 927        table.set("catalog", exp.parse_identifier(default_catalog))
 928
 929    return table
 930
 931
 932def normalize_model_name(
 933    table: str | exp.Table | exp.Column,
 934    default_catalog: t.Optional[str],
 935    dialect: DialectType = None,
 936) -> str:
 937    if isinstance(table, exp.Column):
 938        table = exp.table_(table.this, db=table.args.get("table"), catalog=table.args.get("db"))
 939    else:
 940        # We are relying on sqlglot's flexible parsing here to accept quotes from other dialects.
 941        # Ex: I have a a normalized name of '"my_table"' but the dialect is spark and therefore we should
 942        # expect spark quotes to be backticks ('`') instead of double quotes ('"'). sqlglot today is flexible
 943        # and will still parse this correctly and we rely on that.
 944        table = exp.to_table(table, dialect=dialect)
 945
 946    table = set_default_catalog(table, default_catalog)
 947    # An alternative way to do this is the following: exp.table_name(table, dialect=dialect, identify=True)
 948    # This though would result in the names being normalized to the target dialect AND the quotes while the below
 949    # approach just normalizes the names.
 950    # By just normalizing names and using sqlglot dialect for quotes this makes it easier for dialects that have
 951    # compatible normalization strategies but incompatible quoting to still work together without user hassle
 952    return exp.table_name(normalize_identifiers(table, dialect=dialect), identify=True)
 953
 954
 955def find_tables(
 956    expression: exp.Expression, default_catalog: t.Optional[str], dialect: DialectType = None
 957) -> t.Set[str]:
 958    """Find all tables referenced in a query.
 959
 960    Caches the result in the meta field 'tables'.
 961
 962    Args:
 963        expressions: The query to find the tables in.
 964        dialect: The dialect to use for normalization of table names.
 965
 966    Returns:
 967        A Set of all the table names.
 968    """
 969    if TABLES_META not in expression.meta:
 970        expression.meta[TABLES_META] = {
 971            normalize_model_name(table, default_catalog=default_catalog, dialect=dialect)
 972            for scope in traverse_scope(expression)
 973            for table in scope.tables
 974            if table.name and table.name not in scope.cte_sources
 975        }
 976    return expression.meta[TABLES_META]
 977
 978
 979def add_table(node: exp.Expression, table: str) -> exp.Expression:
 980    """Add a table to all columns in an expression."""
 981
 982    def _transform(node: exp.Expression) -> exp.Expression:
 983        if isinstance(node, exp.Column) and not node.table:
 984            return exp.column(node.this, table=table)
 985        if isinstance(node, exp.Identifier):
 986            return exp.column(node, table=table)
 987        return node
 988
 989    return node.transform(_transform)
 990
 991
 992def transform_values(
 993    values: t.Tuple[t.Any, ...], columns_to_types: t.Dict[str, exp.DataType]
 994) -> t.Iterator[t.Any]:
 995    """Perform transformations on values given columns_to_types."""
 996    for value, col_type in zip(values, columns_to_types.values()):
 997        if col_type.is_type(exp.DataType.Type.JSON):
 998            yield exp.func("PARSE_JSON", f"'{value}'")
 999        elif isinstance(value, dict) and col_type.is_type(*exp.DataType.STRUCT_TYPES):
1000            yield _dict_to_struct(value)
1001        else:
1002            yield value
1003
1004
1005def to_schema(sql_path: str | exp.Table) -> exp.Table:
1006    if isinstance(sql_path, exp.Table) and sql_path.this is None:
1007        return sql_path
1008    table = exp.to_table(sql_path.copy() if isinstance(sql_path, exp.Table) else sql_path)
1009    table.set("catalog", table.args.get("db"))
1010    table.set("db", table.args.get("this"))
1011    table.set("this", None)
1012    return table
1013
1014
1015def schema_(
1016    db: exp.Identifier | str,
1017    catalog: t.Optional[exp.Identifier | str] = None,
1018    quoted: t.Optional[bool] = None,
1019) -> exp.Table:
1020    """Build a Schema.
1021
1022    Args:
1023        db: Database name.
1024        catalog: Catalog name.
1025        quoted: Whether to force quotes on the schema's identifiers.
1026
1027    Returns:
1028        The new Schema instance.
1029    """
1030    return exp.Table(
1031        this=None,
1032        db=exp.to_identifier(db, quoted=quoted) if db else None,
1033        catalog=exp.to_identifier(catalog, quoted=quoted) if catalog else None,
1034    )
1035
1036
1037def normalize_mapping_schema(schema: t.Dict, dialect: DialectType) -> MappingSchema:
1038    return MappingSchema(_unquote_schema(schema), dialect=dialect, normalize=False)
1039
1040
1041def _unquote_schema(schema: t.Dict) -> t.Dict:
1042    """SQLGlot schema expects unquoted normalized keys."""
1043    return {
1044        k.strip('"'): _unquote_schema(v) if isinstance(v, dict) else v for k, v in schema.items()
1045    }
1046
1047
1048def _dict_to_struct(values: t.Dict) -> exp.Struct:
1049    expressions = []
1050    for key, value in values.items():
1051        key = exp.to_identifier(key)
1052        value = _dict_to_struct(value) if isinstance(value, dict) else exp.convert(value)
1053        expressions.append(exp.PropertyEQ(this=key, expression=value))
1054
1055    return exp.Struct(expressions=expressions)
1056
1057
1058@contextmanager
1059def normalize_and_quote(
1060    query: E, dialect: str, default_catalog: t.Optional[str], quote: bool = True
1061) -> t.Iterator[E]:
1062    qualify_tables(query, catalog=default_catalog, dialect=dialect)
1063    normalize_identifiers(query, dialect=dialect)
1064    yield query
1065    if quote:
1066        quote_identifiers(query, dialect=dialect)

class Model(sqlglot.expressions.Expression): View Source

38class Model(exp.Expression):
39    arg_types = {"expressions": True}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc

class Audit(sqlglot.expressions.Expression): View Source

42class Audit(exp.Expression):
43    arg_types = {"expressions": True}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc

class Metric(sqlglot.expressions.Expression): View Source

46class Metric(exp.Expression):
47    arg_types = {"expressions": True}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc

class Jinja(sqlglot.expressions.Func): View Source

50class Jinja(exp.Func):
51    arg_types = {"this": True}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class JinjaQuery(Jinja): View Source

54class JinjaQuery(Jinja):
55    pass

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class JinjaStatement(Jinja): View Source

58class JinjaStatement(Jinja):
59    pass

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class ModelKind(sqlglot.expressions.Expression): View Source

62class ModelKind(exp.Expression):
63    arg_types = {"this": True, "expressions": False}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc

class MacroVar(sqlglot.expressions.Var): View Source

66class MacroVar(exp.Var):
67    pass

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc

class MacroFunc(sqlglot.expressions.Func): View Source

70class MacroFunc(exp.Func):
71    @property
72    def name(self) -> str:
73        return self.this.name

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class MacroDef(MacroFunc): View Source

76class MacroDef(MacroFunc):
77    arg_types = {"this": True, "expression": True}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class MacroSQL(MacroFunc): View Source

80class MacroSQL(MacroFunc):
81    arg_types = {"this": True, "into": False}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class MacroStrReplace(MacroFunc): View Source

84class MacroStrReplace(MacroFunc):
85    pass

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class PythonCode(sqlglot.expressions.Expression): View Source

88class PythonCode(exp.Expression):
89    arg_types = {"expressions": True}

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc

class DColonCast(sqlglot.expressions.Cast): View Source

92class DColonCast(exp.Cast):
93    pass

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Cast: output_name; is_type
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class MetricAgg(sqlglot.expressions.AggFunc): View Source

 96class MetricAgg(exp.AggFunc):
 97    """Used for computing metrics."""
 98
 99    arg_types = {"this": True}
100
101    @property
102    def output_name(self) -> str:
103        return self.this.name

Used for computing metrics.

output_name: str

Name of the output column if this expression is a selection.

If the Expression has no output name, an empty string is returned.

Example:

>>> from sqlglot import parse_one
>>> parse_one("SELECT a").expressions[0].output_name
'a'
>>> parse_one("SELECT b AS c").expressions[0].output_name
'c'
>>> parse_one("SELECT 1 + 2").expressions[0].output_name
''

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Func: from_arg_list; sql_names; sql_name; default_parser_mappings

class StagedFilePath(sqlglot.expressions.Table): View Source

106class StagedFilePath(exp.Table):
107    """Represents paths to "staged files" in Snowflake."""

Represents paths to "staged files" in Snowflake.

Inherited Members

sqlglot.expressions.Expression: Expression; this; expression; expressions; text; is_string; is_number; is_negative; is_int; is_star; alias; output_name; is_type; is_leaf; copy; add_comments; pop_comments; append; set; depth; iter_expressions; find; find_all; find_ancestor; parent_select; same_parent; root; walk; dfs; bfs; unnest; unalias; unnest_operands; flatten; to_s; sql; transform; replace; pop; assert_is; error_messages; dump; load; and_; or_; not_; as_; isin; between; is_; like; ilike; eq; neq; rlike; div; asc; desc
sqlglot.expressions.Table: parts; to_column

def format_model_expressions( expressions: List[sqlglot.expressions.Expression], dialect: Union[str, NoneType] = None, **kwargs: Any) -> str: View Source

575def format_model_expressions(
576    expressions: t.List[exp.Expression], dialect: t.Optional[str] = None, **kwargs: t.Any
577) -> str:
578    """Format a model's expressions into a standardized format.
579
580    Args:
581        expressions: The model's expressions, must be at least model def + query.
582        dialect: The dialect to render the expressions as.
583        **kwargs: Additional keyword arguments to pass to the sql generator.
584
585    Returns:
586        A string representing the formatted model.
587    """
588    if len(expressions) == 1:
589        return expressions[0].sql(pretty=True, dialect=dialect)
590
591    *statements, query = expressions
592
593    def cast_to_colon(node: exp.Expression) -> exp.Expression:
594        if isinstance(node, exp.Cast) and not any(
595            # Only convert CAST into :: if it doesn't have additional args set, otherwise this
596            # conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST)
597            arg
598            for name, arg in node.args.items()
599            if name not in ("this", "to")
600        ):
601            this = node.this
602
603            if not isinstance(this, (exp.Binary, exp.Unary)) or isinstance(this, exp.Paren):
604                cast = DColonCast(this=this, to=node.to)
605                cast.comments = node.comments
606                node = cast
607
608        exp.replace_children(node, cast_to_colon)
609        return node
610
611    query = query.copy()
612    exp.replace_children(query, cast_to_colon)
613
614    return ";\n\n".join(
615        [
616            *(statement.sql(pretty=True, dialect=dialect, **kwargs) for statement in statements),
617            query.sql(pretty=True, dialect=dialect, **kwargs),
618        ]
619    ).strip()

Format a model's expressions into a standardized format.

Arguments:

expressions: The model's expressions, must be at least model def + query.
dialect: The dialect to render the expressions as.
**kwargs: Additional keyword arguments to pass to the sql generator.

Returns:

A string representing the formatted model.

def text_diff( a: List[sqlglot.expressions.Expression], b: List[sqlglot.expressions.Expression], a_dialect: Union[str, NoneType] = None, b_dialect: Union[str, NoneType] = None) -> str: View Source

622def text_diff(
623    a: t.List[exp.Expression],
624    b: t.List[exp.Expression],
625    a_dialect: t.Optional[str] = None,
626    b_dialect: t.Optional[str] = None,
627) -> str:
628    """Find the unified text diff between two expressions."""
629    a_sql = [
630        line
631        for expr in a
632        for line in expr.sql(pretty=True, comments=False, dialect=a_dialect).split("\n")
633    ]
634    b_sql = [
635        line
636        for expr in b
637        for line in expr.sql(pretty=True, comments=False, dialect=b_dialect).split("\n")
638    ]
639    return "\n".join(unified_diff(a_sql, b_sql))

Find the unified text diff between two expressions.

def jinja_query(query: str) -> sqlmesh.core.dialect.JinjaQuery: View Source

674def jinja_query(query: str) -> JinjaQuery:
675    return JinjaQuery(this=exp.Literal.string(query.strip()))

def jinja_statement(statement: str) -> sqlmesh.core.dialect.JinjaStatement: View Source

678def jinja_statement(statement: str) -> JinjaStatement:
679    return JinjaStatement(this=exp.Literal.string(statement.strip()))

class ChunkType(enum.Enum): View Source

682class ChunkType(Enum):
683    JINJA_QUERY = auto()
684    JINJA_STATEMENT = auto()
685    SQL = auto()

An enumeration.

JINJA_QUERY = <ChunkType.JINJA_QUERY: 1>

JINJA_STATEMENT = <ChunkType.JINJA_STATEMENT: 2>

SQL = <ChunkType.SQL: 3>

Inherited Members

enum.Enum: name; value

def parse_one( sql: str, dialect: Union[str, NoneType] = None, into: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]], NoneType] = None) -> sqlglot.expressions.Expression: View Source

688def parse_one(
689    sql: str, dialect: t.Optional[str] = None, into: t.Optional[exp.IntoType] = None
690) -> exp.Expression:
691    expressions = parse(sql, default_dialect=dialect, match_dialect=False, into=into)
692    if not expressions:
693        raise SQLMeshError(f"No expressions found in '{sql}'")
694    elif len(expressions) > 1:
695        raise SQLMeshError(f"Multiple expressions found in '{sql}'")
696    return expressions[0]

def parse( sql: str, default_dialect: Union[str, NoneType] = None, match_dialect: bool = True, into: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]], NoneType] = None) -> List[sqlglot.expressions.Expression]: View Source

699def parse(
700    sql: str,
701    default_dialect: t.Optional[str] = None,
702    match_dialect: bool = True,
703    into: t.Optional[exp.IntoType] = None,
704) -> t.List[exp.Expression]:
705    """Parse a sql string.
706
707    Supports parsing model definition.
708    If a jinja block is detected, the query is stored as raw string in a Jinja node.
709
710    Args:
711        sql: The sql based definition.
712        default_dialect: The dialect to use if the model does not specify one.
713
714    Returns:
715        A list of the parsed expressions: [Model, *Statements, Query, *Statements]
716    """
717    match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE])
718    dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect)
719
720    tokens = dialect.tokenizer.tokenize(sql)
721    chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)]
722    total = len(tokens)
723
724    pos = 0
725    while pos < total:
726        token = tokens[pos]
727        if _is_jinja_end(tokens, pos) or (
728            chunks[-1][1] == ChunkType.SQL
729            and token.token_type == TokenType.SEMICOLON
730            and pos < total - 1
731        ):
732            if token.token_type == TokenType.SEMICOLON:
733                pos += 1
734            else:
735                # Jinja end statement
736                chunks[-1][0].append(token)
737                pos += 2
738            chunks.append(([], ChunkType.SQL))
739        elif _is_jinja_query_begin(tokens, pos):
740            chunks.append(([token], ChunkType.JINJA_QUERY))
741            pos += 2
742        elif _is_jinja_statement_begin(tokens, pos):
743            chunks.append(([token], ChunkType.JINJA_STATEMENT))
744            pos += 2
745        else:
746            chunks[-1][0].append(token)
747            pos += 1
748
749    parser = dialect.parser()
750    expressions: t.List[exp.Expression] = []
751
752    for chunk, chunk_type in chunks:
753        if chunk_type == ChunkType.SQL:
754            parsed_expressions: t.List[t.Optional[exp.Expression]] = (
755                parser.parse(chunk, sql) if into is None else parser.parse_into(into, chunk, sql)
756            )
757            for expression in parsed_expressions:
758                if expression:
759                    expression.meta["sql"] = parser._find_sql(chunk[0], chunk[-1])
760                    expressions.append(expression)
761        else:
762            start, *_, end = chunk
763            segment = sql[start.end + 2 : end.start - 1]
764            factory = jinja_query if chunk_type == ChunkType.JINJA_QUERY else jinja_statement
765            expression = factory(segment.strip())
766            expression.meta["sql"] = sql[start.start : end.end + 1]
767            expressions.append(expression)
768
769    return expressions

Parse a sql string.

Supports parsing model definition. If a jinja block is detected, the query is stored as raw string in a Jinja node.

Arguments:

sql: The sql based definition.
default_dialect: The dialect to use if the model does not specify one.

Returns:

A list of the parsed expressions: [Model, *Statements, Query, *Statements]

def extend_sqlglot() -> None: View Source

772def extend_sqlglot() -> None:
773    """Extend SQLGlot with SQLMesh's custom macro aware dialect."""
774    tokenizers = {Tokenizer}
775    parsers = {Parser}
776    generators = {Generator}
777
778    for dialect in Dialect.classes.values():
779        if hasattr(dialect, "Tokenizer"):
780            tokenizers.add(dialect.Tokenizer)
781        if hasattr(dialect, "Parser"):
782            parsers.add(dialect.Parser)
783        if hasattr(dialect, "Generator"):
784            generators.add(dialect.Generator)
785
786    for tokenizer in tokenizers:
787        tokenizer.VAR_SINGLE_TOKENS.update(SQLMESH_MACRO_PREFIX)
788
789    for parser in parsers:
790        parser.FUNCTIONS.update({"JINJA": Jinja.from_arg_list, "METRIC": MetricAgg.from_arg_list})
791        parser.PLACEHOLDER_PARSERS.update({TokenType.PARAMETER: _parse_macro})
792        parser.QUERY_MODIFIER_PARSERS.update(
793            {TokenType.PARAMETER: lambda self: _parse_body_macro(self)}
794        )
795
796    for generator in generators:
797        if MacroFunc not in generator.TRANSFORMS:
798            generator.TRANSFORMS.update(
799                {
800                    Audit: lambda self, e: _sqlmesh_ddl_sql(self, e, "Audit"),
801                    DColonCast: lambda self, e: f"{self.sql(e, 'this')}::{self.sql(e, 'to')}",
802                    Jinja: lambda self, e: e.name,
803                    JinjaQuery: lambda self, e: f"{JINJA_QUERY_BEGIN};\n{e.name}\n{JINJA_END};",
804                    JinjaStatement: lambda self,
805                    e: f"{JINJA_STATEMENT_BEGIN};\n{e.name}\n{JINJA_END};",
806                    MacroDef: lambda self, e: f"@DEF({self.sql(e.this)}, {self.sql(e.expression)})",
807                    MacroFunc: _macro_func_sql,
808                    MacroStrReplace: lambda self, e: f"@{self.sql(e.this)}",
809                    MacroSQL: lambda self, e: f"@SQL({self.sql(e.this)})",
810                    MacroVar: lambda self, e: f"@{e.name}",
811                    Metric: lambda self, e: _sqlmesh_ddl_sql(self, e, "METRIC"),
812                    Model: lambda self, e: _sqlmesh_ddl_sql(self, e, "MODEL"),
813                    ModelKind: _model_kind_sql,
814                    PythonCode: lambda self, e: self.expressions(e, sep="\n", indent=False),
815                    StagedFilePath: lambda self, e: self.table_sql(e),
816                }
817            )
818
819            generator.WITH_SEPARATED_COMMENTS = (
820                *generator.WITH_SEPARATED_COMMENTS,
821                Model,
822                MacroDef,
823            )
824
825    _override(Parser, _parse_statement)
826    _override(Parser, _parse_join)
827    _override(Parser, _parse_order)
828    _override(Parser, _parse_where)
829    _override(Parser, _parse_group)
830    _override(Parser, _parse_with)
831    _override(Parser, _parse_having)
832    _override(Parser, _parse_limit)
833    _override(Parser, _parse_lambda)
834    _override(Parser, _parse_types)
835    _override(Parser, _parse_if)
836    _override(Parser, _parse_id_var)
837    _override(Snowflake.Parser, _parse_table_parts)

Extend SQLGlot with SQLMesh's custom macro aware dialect.

def select_from_values( values: List[<MagicMock id='140338269585952'>], columns_to_types: Dict[str, sqlglot.expressions.DataType], batch_size: int = 0, alias: str = 't') -> Iterator[sqlglot.expressions.Select]: View Source

840def select_from_values(
841    values: t.List[PandasNamedTuple],
842    columns_to_types: t.Dict[str, exp.DataType],
843    batch_size: int = 0,
844    alias: str = "t",
845) -> t.Iterator[exp.Select]:
846    """Generate a VALUES expression that has a select wrapped around it to cast the values to their correct types.
847
848    Args:
849        values: List of values to use for the VALUES expression.
850        columns_to_types: Mapping of column names to types to assign to the values.
851        batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
852        alias: The alias to assign to the values expression. If not provided then will default to "t"
853
854    Returns:
855        This method operates as a generator and yields a VALUES expression.
856    """
857    if batch_size <= 0:
858        batch_size = sys.maxsize
859    num_rows = len(values)
860    for i in range(0, num_rows, batch_size):
861        yield select_from_values_for_batch_range(
862            values=values,
863            columns_to_types=columns_to_types,
864            batch_start=i,
865            batch_end=min(i + batch_size, num_rows),
866            alias=alias,
867        )

Generate a VALUES expression that has a select wrapped around it to cast the values to their correct types.

Arguments:

values: List of values to use for the VALUES expression.
columns_to_types: Mapping of column names to types to assign to the values.
batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
alias: The alias to assign to the values expression. If not provided then will default to "t"

Returns:

This method operates as a generator and yields a VALUES expression.

def select_from_values_for_batch_range( values: List[<MagicMock id='140338269633600'>], columns_to_types: Dict[str, sqlglot.expressions.DataType], batch_start: int, batch_end: int, alias: str = 't') -> sqlglot.expressions.Select: View Source

870def select_from_values_for_batch_range(
871    values: t.List[PandasNamedTuple],
872    columns_to_types: t.Dict[str, exp.DataType],
873    batch_start: int,
874    batch_end: int,
875    alias: str = "t",
876) -> exp.Select:
877    casted_columns = [
878        exp.alias_(exp.cast(column, to=kind), column, copy=False)
879        for column, kind in columns_to_types.items()
880    ]
881
882    if not values:
883        # Ensures we don't generate an empty VALUES clause & forces a zero-row output
884        where = exp.false()
885        expressions = [tuple(exp.cast(exp.null(), to=kind) for kind in columns_to_types.values())]
886    else:
887        where = None
888        expressions = [
889            tuple(transform_values(v, columns_to_types)) for v in values[batch_start:batch_end]
890        ]
891
892    values_exp = exp.values(expressions, alias=alias, columns=columns_to_types)
893    return exp.select(*casted_columns).from_(values_exp, copy=False).where(where, copy=False)

def pandas_to_sql( df: pandas.core.frame.DataFrame, columns_to_types: Union[Dict[str, sqlglot.expressions.DataType], NoneType] = None, batch_size: int = 0, alias: str = 't') -> Iterator[sqlglot.expressions.Select]: View Source

896def pandas_to_sql(
897    df: pd.DataFrame,
898    columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
899    batch_size: int = 0,
900    alias: str = "t",
901) -> t.Iterator[exp.Select]:
902    """Convert a pandas dataframe into a VALUES sql statement.
903
904    Args:
905        df: A pandas dataframe to convert.
906        columns_to_types: Mapping of column names to types to assign to the values.
907        batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
908        alias: The alias to assign to the values expression. If not provided then will default to "t"
909
910    Returns:
911        This method operates as a generator and yields a VALUES expression.
912    """
913    yield from select_from_values(
914        values=list(df.itertuples(index=False, name=None)),
915        columns_to_types=columns_to_types or columns_to_types_from_df(df),
916        batch_size=batch_size,
917        alias=alias,
918    )

Convert a pandas dataframe into a VALUES sql statement.

Arguments:

df: A pandas dataframe to convert.
columns_to_types: Mapping of column names to types to assign to the values.
batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
alias: The alias to assign to the values expression. If not provided then will default to "t"

Returns:

This method operates as a generator and yields a VALUES expression.

def set_default_catalog( table: 'str | exp.Table', default_catalog: Union[str, NoneType]) -> sqlglot.expressions.Table: View Source

921def set_default_catalog(
922    table: str | exp.Table,
923    default_catalog: t.Optional[str],
924) -> exp.Table:
925    table = exp.to_table(table)
926
927    if default_catalog and not table.catalog and table.db:
928        table.set("catalog", exp.parse_identifier(default_catalog))
929
930    return table

def normalize_model_name( table: 'str | exp.Table | exp.Column', default_catalog: Union[str, NoneType], dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType] = None) -> str: View Source

933def normalize_model_name(
934    table: str | exp.Table | exp.Column,
935    default_catalog: t.Optional[str],
936    dialect: DialectType = None,
937) -> str:
938    if isinstance(table, exp.Column):
939        table = exp.table_(table.this, db=table.args.get("table"), catalog=table.args.get("db"))
940    else:
941        # We are relying on sqlglot's flexible parsing here to accept quotes from other dialects.
942        # Ex: I have a a normalized name of '"my_table"' but the dialect is spark and therefore we should
943        # expect spark quotes to be backticks ('`') instead of double quotes ('"'). sqlglot today is flexible
944        # and will still parse this correctly and we rely on that.
945        table = exp.to_table(table, dialect=dialect)
946
947    table = set_default_catalog(table, default_catalog)
948    # An alternative way to do this is the following: exp.table_name(table, dialect=dialect, identify=True)
949    # This though would result in the names being normalized to the target dialect AND the quotes while the below
950    # approach just normalizes the names.
951    # By just normalizing names and using sqlglot dialect for quotes this makes it easier for dialects that have
952    # compatible normalization strategies but incompatible quoting to still work together without user hassle
953    return exp.table_name(normalize_identifiers(table, dialect=dialect), identify=True)

def find_tables( expression: sqlglot.expressions.Expression, default_catalog: Union[str, NoneType], dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType] = None) -> Set[str]: View Source

956def find_tables(
957    expression: exp.Expression, default_catalog: t.Optional[str], dialect: DialectType = None
958) -> t.Set[str]:
959    """Find all tables referenced in a query.
960
961    Caches the result in the meta field 'tables'.
962
963    Args:
964        expressions: The query to find the tables in.
965        dialect: The dialect to use for normalization of table names.
966
967    Returns:
968        A Set of all the table names.
969    """
970    if TABLES_META not in expression.meta:
971        expression.meta[TABLES_META] = {
972            normalize_model_name(table, default_catalog=default_catalog, dialect=dialect)
973            for scope in traverse_scope(expression)
974            for table in scope.tables
975            if table.name and table.name not in scope.cte_sources
976        }
977    return expression.meta[TABLES_META]

Find all tables referenced in a query.

Caches the result in the meta field 'tables'.

Arguments:

expressions: The query to find the tables in.
dialect: The dialect to use for normalization of table names.

Returns:

A Set of all the table names.

def add_table( node: sqlglot.expressions.Expression, table: str) -> sqlglot.expressions.Expression: View Source

980def add_table(node: exp.Expression, table: str) -> exp.Expression:
981    """Add a table to all columns in an expression."""
982
983    def _transform(node: exp.Expression) -> exp.Expression:
984        if isinstance(node, exp.Column) and not node.table:
985            return exp.column(node.this, table=table)
986        if isinstance(node, exp.Identifier):
987            return exp.column(node, table=table)
988        return node
989
990    return node.transform(_transform)

Add a table to all columns in an expression.

def transform_values( values: Tuple[Any, ...], columns_to_types: Dict[str, sqlglot.expressions.DataType]) -> Iterator[Any]: View Source

 993def transform_values(
 994    values: t.Tuple[t.Any, ...], columns_to_types: t.Dict[str, exp.DataType]
 995) -> t.Iterator[t.Any]:
 996    """Perform transformations on values given columns_to_types."""
 997    for value, col_type in zip(values, columns_to_types.values()):
 998        if col_type.is_type(exp.DataType.Type.JSON):
 999            yield exp.func("PARSE_JSON", f"'{value}'")
1000        elif isinstance(value, dict) and col_type.is_type(*exp.DataType.STRUCT_TYPES):
1001            yield _dict_to_struct(value)
1002        else:
1003            yield value

Perform transformations on values given columns_to_types.

def to_schema(sql_path: 'str | exp.Table') -> sqlglot.expressions.Table: View Source

1006def to_schema(sql_path: str | exp.Table) -> exp.Table:
1007    if isinstance(sql_path, exp.Table) and sql_path.this is None:
1008        return sql_path
1009    table = exp.to_table(sql_path.copy() if isinstance(sql_path, exp.Table) else sql_path)
1010    table.set("catalog", table.args.get("db"))
1011    table.set("db", table.args.get("this"))
1012    table.set("this", None)
1013    return table

def schema_( db: 'exp.Identifier | str', catalog: 't.Optional[exp.Identifier | str]' = None, quoted: Union[bool, NoneType] = None) -> sqlglot.expressions.Table: View Source

1016def schema_(
1017    db: exp.Identifier | str,
1018    catalog: t.Optional[exp.Identifier | str] = None,
1019    quoted: t.Optional[bool] = None,
1020) -> exp.Table:
1021    """Build a Schema.
1022
1023    Args:
1024        db: Database name.
1025        catalog: Catalog name.
1026        quoted: Whether to force quotes on the schema's identifiers.
1027
1028    Returns:
1029        The new Schema instance.
1030    """
1031    return exp.Table(
1032        this=None,
1033        db=exp.to_identifier(db, quoted=quoted) if db else None,
1034        catalog=exp.to_identifier(catalog, quoted=quoted) if catalog else None,
1035    )

Build a Schema.

Arguments:

db: Database name.
catalog: Catalog name.
quoted: Whether to force quotes on the schema's identifiers.

Returns:

The new Schema instance.

def normalize_mapping_schema( schema: Dict, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> sqlglot.schema.MappingSchema: View Source

1038def normalize_mapping_schema(schema: t.Dict, dialect: DialectType) -> MappingSchema:
1039    return MappingSchema(_unquote_schema(schema), dialect=dialect, normalize=False)

@contextmanager

def normalize_and_quote( query: ~E, dialect: str, default_catalog: Union[str, NoneType], quote: bool = True) -> Iterator[~E]: View Source

1059@contextmanager
1060def normalize_and_quote(
1061    query: E, dialect: str, default_catalog: t.Optional[str], quote: bool = True
1062) -> t.Iterator[E]:
1063    qualify_tables(query, catalog=default_catalog, dialect=dialect)
1064    normalize_identifiers(query, dialect=dialect)
1065    yield query
1066    if quote:
1067        quote_identifiers(query, dialect=dialect)