sqlmesh.core.dialect
1from __future__ import annotations 2 3import functools 4import re 5import sys 6import typing as t 7from contextlib import contextmanager 8from difflib import unified_diff 9from enum import Enum, auto 10 11import pandas as pd 12from sqlglot import Dialect, Generator, ParseError, Parser, Tokenizer, TokenType, exp 13from sqlglot.dialects.dialect import DialectType 14from sqlglot.dialects.snowflake import Snowflake 15from sqlglot.helper import seq_get 16from sqlglot.optimizer.normalize_identifiers import normalize_identifiers 17from sqlglot.optimizer.qualify_columns import quote_identifiers 18from sqlglot.optimizer.qualify_tables import qualify_tables 19from sqlglot.optimizer.scope import traverse_scope 20from sqlglot.schema import MappingSchema 21from sqlglot.tokens import Token 22 23from sqlmesh.core.constants import MAX_MODEL_DEFINITION_SIZE 24from sqlmesh.utils.errors import SQLMeshError 25from sqlmesh.utils.pandas import columns_to_types_from_df 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 from sqlmesh.utils.pandas import PandasNamedTuple 31 32SQLMESH_MACRO_PREFIX = "@" 33 34TABLES_META = "sqlmesh.tables" 35 36 37class Model(exp.Expression): 38 arg_types = {"expressions": True} 39 40 41class Audit(exp.Expression): 42 arg_types = {"expressions": True} 43 44 45class Metric(exp.Expression): 46 arg_types = {"expressions": True} 47 48 49class Jinja(exp.Func): 50 arg_types = {"this": True} 51 52 53class JinjaQuery(Jinja): 54 pass 55 56 57class JinjaStatement(Jinja): 58 pass 59 60 61class ModelKind(exp.Expression): 62 arg_types = {"this": True, "expressions": False} 63 64 65class MacroVar(exp.Var): 66 pass 67 68 69class MacroFunc(exp.Func): 70 @property 71 def name(self) -> str: 72 return self.this.name 73 74 75class MacroDef(MacroFunc): 76 arg_types = {"this": True, "expression": True} 77 78 79class MacroSQL(MacroFunc): 80 arg_types = {"this": True, "into": False} 81 82 83class MacroStrReplace(MacroFunc): 84 pass 85 86 87class PythonCode(exp.Expression): 88 arg_types = {"expressions": True} 89 90 91class DColonCast(exp.Cast): 92 pass 93 94 95class MetricAgg(exp.AggFunc): 96 """Used for computing metrics.""" 97 98 arg_types = {"this": True} 99 100 @property 101 def output_name(self) -> str: 102 return self.this.name 103 104 105class StagedFilePath(exp.Table): 106 """Represents paths to "staged files" in Snowflake.""" 107 108 109def _parse_statement(self: Parser) -> t.Optional[exp.Expression]: 110 if self._curr is None: 111 return None 112 113 parser = PARSERS.get(self._curr.text.upper()) 114 115 if parser: 116 # Capture any available description in the form of a comment 117 comments = self._curr.comments 118 119 index = self._index 120 try: 121 self._advance() 122 meta = self._parse_wrapped(lambda: t.cast(t.Callable, parser)(self)) 123 except ParseError: 124 self._retreat(index) 125 126 # Only return the DDL expression if we actually managed to parse one. This is 127 # done in order to allow parsing standalone identifiers / function calls like 128 # "metric", or "model(1, 2, 3)", which collide with SQLMesh's DDL syntax. 129 if self._index != index: 130 meta.comments = comments 131 return meta 132 133 return self.__parse_statement() # type: ignore 134 135 136def _parse_lambda(self: Parser, alias: bool = False) -> t.Optional[exp.Expression]: 137 node = self.__parse_lambda(alias=alias) # type: ignore 138 if isinstance(node, exp.Lambda): 139 node.set("this", self._parse_alias(node.this)) 140 return node 141 142 143def _parse_id_var( 144 self: Parser, 145 any_token: bool = True, 146 tokens: t.Optional[t.Collection[TokenType]] = None, 147) -> t.Optional[exp.Expression]: 148 if self._prev and self._prev.text == SQLMESH_MACRO_PREFIX and self._match(TokenType.L_BRACE): 149 identifier = self.__parse_id_var(any_token=any_token, tokens=tokens) # type: ignore 150 if not self._match(TokenType.R_BRACE): 151 self.raise_error("Expecting }") 152 identifier.args["this"] = f"@{{{identifier.name}}}" 153 else: 154 identifier = self.__parse_id_var(any_token=any_token, tokens=tokens) # type: ignore 155 156 while ( 157 identifier 158 and self._is_connected() 159 and ( 160 self._match_texts(("{", SQLMESH_MACRO_PREFIX)) 161 or self._curr.token_type not in self.RESERVED_TOKENS 162 ) 163 ): 164 this = identifier.name 165 brace = False 166 167 if self._prev.text == "{": 168 this += "{" 169 brace = True 170 else: 171 if self._prev.text == SQLMESH_MACRO_PREFIX: 172 this += "@" 173 if self._match(TokenType.L_BRACE): 174 this += "{" 175 brace = True 176 177 next_id = self._parse_id_var(any_token=False) 178 179 if next_id: 180 this += next_id.name 181 else: 182 return identifier 183 184 if brace: 185 if self._match(TokenType.R_BRACE): 186 this += "}" 187 else: 188 self.raise_error("Expecting }") 189 190 identifier = self.expression(exp.Identifier, this=this, quoted=identifier.quoted) 191 192 return identifier 193 194 195def _parse_macro(self: Parser, keyword_macro: str = "") -> t.Optional[exp.Expression]: 196 if self._prev.text != SQLMESH_MACRO_PREFIX: 197 return self._parse_parameter() 198 199 comments = self._prev.comments 200 index = self._index 201 field = self._parse_primary() or self._parse_function(functions={}) or self._parse_id_var() 202 203 if isinstance(field, exp.Func): 204 macro_name = field.name.upper() 205 if macro_name != keyword_macro and macro_name in KEYWORD_MACROS: 206 self._retreat(index) 207 return None 208 209 if isinstance(field, exp.Anonymous): 210 if macro_name == "DEF": 211 return self.expression( 212 MacroDef, 213 this=field.expressions[0], 214 expression=field.expressions[1], 215 comments=comments, 216 ) 217 if macro_name == "SQL": 218 into = field.expressions[1].this.lower() if len(field.expressions) > 1 else None 219 return self.expression( 220 MacroSQL, this=field.expressions[0], into=into, comments=comments 221 ) 222 else: 223 field = self.expression( 224 exp.Anonymous, 225 this=field.sql_name(), 226 expressions=list(field.args.values()), 227 comments=comments, 228 ) 229 230 return self.expression(MacroFunc, this=field, comments=comments) 231 232 if field is None: 233 return None 234 235 if field.is_string or (isinstance(field, exp.Identifier) and field.quoted): 236 return self.expression( 237 MacroStrReplace, this=exp.Literal.string(field.this), comments=comments 238 ) 239 240 if "@" in field.this: 241 return field 242 return self.expression(MacroVar, this=field.this, comments=comments) 243 244 245KEYWORD_MACROS = {"WITH", "JOIN", "WHERE", "GROUP_BY", "HAVING", "ORDER_BY", "LIMIT"} 246 247 248def _parse_matching_macro(self: Parser, name: str) -> t.Optional[exp.Expression]: 249 if not self._match_pair(TokenType.PARAMETER, TokenType.VAR, advance=False) or ( 250 self._next and self._next.text.upper() != name.upper() 251 ): 252 return None 253 254 self._advance() 255 return _parse_macro(self, keyword_macro=name) 256 257 258def _parse_body_macro(self: Parser) -> t.Tuple[str, t.Optional[exp.Expression]]: 259 name = self._next and self._next.text.upper() 260 261 if name == "JOIN": 262 return ("joins", self._parse_join()) 263 if name == "WHERE": 264 return ("where", self._parse_where()) 265 if name == "GROUP_BY": 266 return ("group", self._parse_group()) 267 if name == "HAVING": 268 return ("having", self._parse_having()) 269 if name == "ORDER_BY": 270 return ("order", self._parse_order()) 271 if name == "LIMIT": 272 return ("limit", self._parse_limit()) 273 return ("", None) 274 275 276def _parse_with(self: Parser, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 277 macro = _parse_matching_macro(self, "WITH") 278 if not macro: 279 return self.__parse_with(skip_with_token=skip_with_token) # type: ignore 280 281 macro.this.append("expressions", self.__parse_with(skip_with_token=True)) # type: ignore 282 return macro 283 284 285def _parse_join( 286 self: Parser, skip_join_token: bool = False, parse_bracket: bool = False 287) -> t.Optional[exp.Expression]: 288 index = self._index 289 method, side, kind = self._parse_join_parts() 290 macro = _parse_matching_macro(self, "JOIN") 291 if not macro: 292 self._retreat(index) 293 return self.__parse_join(skip_join_token=skip_join_token, parse_bracket=parse_bracket) # type: ignore 294 295 join = self.__parse_join(skip_join_token=True) # type: ignore 296 if method: 297 join.set("method", method.text) 298 if side: 299 join.set("side", side.text) 300 if kind: 301 join.set("kind", kind.text) 302 303 macro.this.append("expressions", join) 304 return macro 305 306 307def _parse_where(self: Parser, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 308 macro = _parse_matching_macro(self, "WHERE") 309 if not macro: 310 return self.__parse_where(skip_where_token=skip_where_token) # type: ignore 311 312 macro.this.append("expressions", self.__parse_where(skip_where_token=True)) # type: ignore 313 return macro 314 315 316def _parse_group(self: Parser, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 317 macro = _parse_matching_macro(self, "GROUP_BY") 318 if not macro: 319 return self.__parse_group(skip_group_by_token=skip_group_by_token) # type: ignore 320 321 macro.this.append("expressions", self.__parse_group(skip_group_by_token=True)) # type: ignore 322 return macro 323 324 325def _parse_having(self: Parser, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 326 macro = _parse_matching_macro(self, "HAVING") 327 if not macro: 328 return self.__parse_having(skip_having_token=skip_having_token) # type: ignore 329 330 macro.this.append("expressions", self.__parse_having(skip_having_token=True)) # type: ignore 331 return macro 332 333 334def _parse_order( 335 self: Parser, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 336) -> t.Optional[exp.Expression]: 337 macro = _parse_matching_macro(self, "ORDER_BY") 338 if not macro: 339 return self.__parse_order(this, skip_order_token=skip_order_token) # type: ignore 340 341 macro.this.append("expressions", self.__parse_order(this, skip_order_token=True)) # type: ignore 342 return macro 343 344 345def _parse_limit( 346 self: Parser, 347 this: t.Optional[exp.Expression] = None, 348 top: bool = False, 349 skip_limit_token: bool = False, 350) -> t.Optional[exp.Expression]: 351 macro = _parse_matching_macro(self, "TOP" if top else "LIMIT") 352 if not macro: 353 return self.__parse_limit(this, top=top, skip_limit_token=skip_limit_token) # type: ignore 354 355 macro.this.append("expressions", self.__parse_limit(this, top=top, skip_limit_token=True)) # type: ignore 356 return macro 357 358 359def _parse_props(self: Parser) -> t.Optional[exp.Expression]: 360 key = self._parse_id_var(any_token=True) 361 if not key: 362 return None 363 364 name = key.name.lower() 365 if name == "when_matched": 366 value: t.Optional[exp.Expression] = self._parse_when_matched()[0] 367 elif self._match(TokenType.L_PAREN): 368 value = self.expression(exp.Tuple, expressions=self._parse_csv(self._parse_equality)) 369 self._match_r_paren() 370 else: 371 value = self._parse_bracket(self._parse_field(any_token=True)) 372 373 if name == "path" and value: 374 # Make sure if we get a windows path that it is converted to posix 375 value = exp.Literal.string(value.this.replace("\\", "/")) 376 377 return self.expression(exp.Property, this=name, value=value) 378 379 380def _parse_types( 381 self: Parser, 382 check_func: bool = False, 383 schema: bool = False, 384 allow_identifiers: bool = True, 385) -> t.Optional[exp.Expression]: 386 start = self._curr 387 parsed_type = self.__parse_types( # type: ignore 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if schema and parsed_type: 392 parsed_type.meta["sql"] = self._find_sql(start, self._prev) 393 394 return parsed_type 395 396 397# Only needed for Snowflake: its "staged file" syntax (@<path>) clashes with our macro 398# var syntax. By converting the Var representation to a MacroVar, we should be able to 399# handle both use cases: if there's no value in the MacroEvaluator's context for that 400# MacroVar, it'll render into @<path>, so it won't break staged file path references. 401# 402# See: https://docs.snowflake.com/en/user-guide/querying-stage 403def _parse_table_parts( 404 self: Parser, schema: bool = False, is_db_reference: bool = False 405) -> exp.Table: 406 index = self._index 407 table = self.__parse_table_parts(schema=schema, is_db_reference=is_db_reference) # type: ignore 408 table_arg = table.this 409 410 if isinstance(table_arg, exp.Var) and table_arg.name.startswith(SQLMESH_MACRO_PREFIX): 411 # Macro functions do not clash with the staged file syntax, so we can safely parse them 412 if "(" in table_arg.name: 413 self._retreat(index) 414 return Parser._parse_table_parts(self, schema=schema, is_db_reference=is_db_reference) 415 416 table_arg.replace(MacroVar(this=table_arg.name[1:])) 417 return StagedFilePath(**table.args) 418 419 return table 420 421 422def _parse_if(self: Parser) -> t.Optional[exp.Expression]: 423 # If we fail to parse an IF function with expressions as arguments, we then try 424 # to parse a statement / command to support the macro @IF(condition, statement) 425 index = self._index 426 try: 427 return self.__parse_if() # type: ignore 428 except ParseError: 429 self._retreat(index) 430 self._match_l_paren() 431 432 cond = self._parse_conjunction() 433 self._match(TokenType.COMMA) 434 435 # Try to parse a known statement, otherwise fall back to parsing a command 436 index = self._index 437 statement = self._parse_statement() 438 if isinstance(statement, exp.Command): 439 self._retreat(index) 440 statement = self._parse_as_command(self._curr) 441 442 # Unconsume the right parenthesis as well as omit it from the command's text 443 self._retreat(self._index - 1) 444 statement.set("expression", statement.expression[:-1]) 445 446 # Return anonymous so that _parse_macro can create a MacroFunc with this value 447 self._match_r_paren() 448 return exp.Anonymous(this="IF", expressions=[cond, statement]) 449 450 451def _create_parser(parser_type: t.Type[exp.Expression], table_keys: t.List[str]) -> t.Callable: 452 def parse(self: Parser) -> t.Optional[exp.Expression]: 453 from sqlmesh.core.model.kind import ModelKindName 454 455 expressions: t.List[exp.Expression] = [] 456 457 while True: 458 prev_property = seq_get(expressions, -1) 459 if not self._match(TokenType.COMMA, expression=prev_property) and expressions: 460 break 461 462 key_expression = self._parse_id_var(any_token=True) 463 if not key_expression: 464 break 465 466 # This allows macro functions that programmaticaly generate the property key-value pair 467 if isinstance(key_expression, MacroFunc): 468 expressions.append(key_expression) 469 continue 470 471 key = key_expression.name.lower() 472 473 start = self._curr 474 value: t.Optional[exp.Expression | str] 475 476 if key in table_keys: 477 value = self._parse_table_parts() 478 elif key == "columns": 479 value = self._parse_schema() 480 elif key == "kind": 481 id_var = self._parse_id_var(any_token=True) 482 if not id_var: 483 value = None 484 else: 485 kind = ModelKindName[id_var.name.upper()] 486 487 if kind in ( 488 ModelKindName.INCREMENTAL_BY_TIME_RANGE, 489 ModelKindName.INCREMENTAL_BY_UNIQUE_KEY, 490 ModelKindName.SEED, 491 ModelKindName.VIEW, 492 ModelKindName.SCD_TYPE_2, 493 ModelKindName.SCD_TYPE_2_BY_TIME, 494 ModelKindName.SCD_TYPE_2_BY_COLUMN, 495 ) and self._match(TokenType.L_PAREN, advance=False): 496 props = self._parse_wrapped_csv(functools.partial(_parse_props, self)) 497 else: 498 props = None 499 500 value = self.expression( 501 ModelKind, 502 this=kind.value, 503 expressions=props, 504 ) 505 elif key == "expression": 506 value = self._parse_conjunction() 507 else: 508 value = self._parse_bracket(self._parse_field(any_token=True)) 509 510 if isinstance(value, exp.Expression): 511 value.meta["sql"] = self._find_sql(start, self._prev) 512 513 expressions.append(self.expression(exp.Property, this=key, value=value)) 514 515 return self.expression(parser_type, expressions=expressions) 516 517 return parse 518 519 520PARSERS = { 521 "MODEL": _create_parser(Model, ["name"]), 522 "AUDIT": _create_parser(Audit, ["model"]), 523 "METRIC": _create_parser(Metric, ["name"]), 524} 525 526 527def _props_sql(self: Generator, expressions: t.List[exp.Expression]) -> str: 528 props = [] 529 size = len(expressions) 530 531 for i, prop in enumerate(expressions): 532 sql = self.indent(f"{prop.name} {self.sql(prop, 'value')}") 533 534 if i < size - 1: 535 sql += "," 536 props.append(self.maybe_comment(sql, expression=prop)) 537 538 return "\n".join(props) 539 540 541def _sqlmesh_ddl_sql(self: Generator, expression: Model | Audit | Metric, name: str) -> str: 542 return "\n".join([f"{name} (", _props_sql(self, expression.expressions), ")"]) 543 544 545def _model_kind_sql(self: Generator, expression: ModelKind) -> str: 546 props = _props_sql(self, expression.expressions) 547 if props: 548 return "\n".join([f"{expression.this} (", props, ")"]) 549 return expression.name.upper() 550 551 552def _macro_keyword_func_sql(self: Generator, expression: exp.Expression) -> str: 553 name = expression.name 554 keyword = name.replace("_", " ") 555 *args, clause = expression.expressions 556 macro = f"@{name}({self.format_args(*args)})" 557 return self.sql(clause).replace(keyword, macro, 1) 558 559 560def _macro_func_sql(self: Generator, expression: MacroFunc) -> str: 561 expression = expression.this 562 name = expression.name 563 if name in KEYWORD_MACROS: 564 return _macro_keyword_func_sql(self, expression) 565 return f"@{name}({self.format_args(*expression.expressions)})" 566 567 568def _override(klass: t.Type[Tokenizer | Parser], func: t.Callable) -> None: 569 name = func.__name__ 570 setattr(klass, f"_{name}", getattr(klass, name)) 571 setattr(klass, name, func) 572 573 574def format_model_expressions( 575 expressions: t.List[exp.Expression], dialect: t.Optional[str] = None, **kwargs: t.Any 576) -> str: 577 """Format a model's expressions into a standardized format. 578 579 Args: 580 expressions: The model's expressions, must be at least model def + query. 581 dialect: The dialect to render the expressions as. 582 **kwargs: Additional keyword arguments to pass to the sql generator. 583 584 Returns: 585 A string representing the formatted model. 586 """ 587 if len(expressions) == 1: 588 return expressions[0].sql(pretty=True, dialect=dialect) 589 590 *statements, query = expressions 591 592 def cast_to_colon(node: exp.Expression) -> exp.Expression: 593 if isinstance(node, exp.Cast) and not any( 594 # Only convert CAST into :: if it doesn't have additional args set, otherwise this 595 # conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST) 596 arg 597 for name, arg in node.args.items() 598 if name not in ("this", "to") 599 ): 600 this = node.this 601 602 if not isinstance(this, (exp.Binary, exp.Unary)) or isinstance(this, exp.Paren): 603 cast = DColonCast(this=this, to=node.to) 604 cast.comments = node.comments 605 node = cast 606 607 exp.replace_children(node, cast_to_colon) 608 return node 609 610 query = query.copy() 611 exp.replace_children(query, cast_to_colon) 612 613 return ";\n\n".join( 614 [ 615 *(statement.sql(pretty=True, dialect=dialect, **kwargs) for statement in statements), 616 query.sql(pretty=True, dialect=dialect, **kwargs), 617 ] 618 ).strip() 619 620 621def text_diff( 622 a: t.List[exp.Expression], 623 b: t.List[exp.Expression], 624 a_dialect: t.Optional[str] = None, 625 b_dialect: t.Optional[str] = None, 626) -> str: 627 """Find the unified text diff between two expressions.""" 628 a_sql = [ 629 line 630 for expr in a 631 for line in expr.sql(pretty=True, comments=False, dialect=a_dialect).split("\n") 632 ] 633 b_sql = [ 634 line 635 for expr in b 636 for line in expr.sql(pretty=True, comments=False, dialect=b_dialect).split("\n") 637 ] 638 return "\n".join(unified_diff(a_sql, b_sql)) 639 640 641DIALECT_PATTERN = re.compile( 642 r"(model|audit).*?\(.*?dialect[^a-z,]+([a-z]*|,)", re.IGNORECASE | re.DOTALL 643) 644 645 646def _is_command_statement(command: str, tokens: t.List[Token], pos: int) -> bool: 647 try: 648 return ( 649 tokens[pos].text.upper() == command.upper() 650 and tokens[pos + 1].token_type == TokenType.SEMICOLON 651 ) 652 except IndexError: 653 return False 654 655 656JINJA_QUERY_BEGIN = "JINJA_QUERY_BEGIN" 657JINJA_STATEMENT_BEGIN = "JINJA_STATEMENT_BEGIN" 658JINJA_END = "JINJA_END" 659 660 661def _is_jinja_statement_begin(tokens: t.List[Token], pos: int) -> bool: 662 return _is_command_statement(JINJA_STATEMENT_BEGIN, tokens, pos) 663 664 665def _is_jinja_query_begin(tokens: t.List[Token], pos: int) -> bool: 666 return _is_command_statement(JINJA_QUERY_BEGIN, tokens, pos) 667 668 669def _is_jinja_end(tokens: t.List[Token], pos: int) -> bool: 670 return _is_command_statement(JINJA_END, tokens, pos) 671 672 673def jinja_query(query: str) -> JinjaQuery: 674 return JinjaQuery(this=exp.Literal.string(query.strip())) 675 676 677def jinja_statement(statement: str) -> JinjaStatement: 678 return JinjaStatement(this=exp.Literal.string(statement.strip())) 679 680 681class ChunkType(Enum): 682 JINJA_QUERY = auto() 683 JINJA_STATEMENT = auto() 684 SQL = auto() 685 686 687def parse_one( 688 sql: str, dialect: t.Optional[str] = None, into: t.Optional[exp.IntoType] = None 689) -> exp.Expression: 690 expressions = parse(sql, default_dialect=dialect, match_dialect=False, into=into) 691 if not expressions: 692 raise SQLMeshError(f"No expressions found in '{sql}'") 693 elif len(expressions) > 1: 694 raise SQLMeshError(f"Multiple expressions found in '{sql}'") 695 return expressions[0] 696 697 698def parse( 699 sql: str, 700 default_dialect: t.Optional[str] = None, 701 match_dialect: bool = True, 702 into: t.Optional[exp.IntoType] = None, 703) -> t.List[exp.Expression]: 704 """Parse a sql string. 705 706 Supports parsing model definition. 707 If a jinja block is detected, the query is stored as raw string in a Jinja node. 708 709 Args: 710 sql: The sql based definition. 711 default_dialect: The dialect to use if the model does not specify one. 712 713 Returns: 714 A list of the parsed expressions: [Model, *Statements, Query, *Statements] 715 """ 716 match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE]) 717 dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect) 718 719 tokens = dialect.tokenizer.tokenize(sql) 720 chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)] 721 total = len(tokens) 722 723 pos = 0 724 while pos < total: 725 token = tokens[pos] 726 if _is_jinja_end(tokens, pos) or ( 727 chunks[-1][1] == ChunkType.SQL 728 and token.token_type == TokenType.SEMICOLON 729 and pos < total - 1 730 ): 731 if token.token_type == TokenType.SEMICOLON: 732 pos += 1 733 else: 734 # Jinja end statement 735 chunks[-1][0].append(token) 736 pos += 2 737 chunks.append(([], ChunkType.SQL)) 738 elif _is_jinja_query_begin(tokens, pos): 739 chunks.append(([token], ChunkType.JINJA_QUERY)) 740 pos += 2 741 elif _is_jinja_statement_begin(tokens, pos): 742 chunks.append(([token], ChunkType.JINJA_STATEMENT)) 743 pos += 2 744 else: 745 chunks[-1][0].append(token) 746 pos += 1 747 748 parser = dialect.parser() 749 expressions: t.List[exp.Expression] = [] 750 751 for chunk, chunk_type in chunks: 752 if chunk_type == ChunkType.SQL: 753 parsed_expressions: t.List[t.Optional[exp.Expression]] = ( 754 parser.parse(chunk, sql) if into is None else parser.parse_into(into, chunk, sql) 755 ) 756 for expression in parsed_expressions: 757 if expression: 758 expression.meta["sql"] = parser._find_sql(chunk[0], chunk[-1]) 759 expressions.append(expression) 760 else: 761 start, *_, end = chunk 762 segment = sql[start.end + 2 : end.start - 1] 763 factory = jinja_query if chunk_type == ChunkType.JINJA_QUERY else jinja_statement 764 expression = factory(segment.strip()) 765 expression.meta["sql"] = sql[start.start : end.end + 1] 766 expressions.append(expression) 767 768 return expressions 769 770 771def extend_sqlglot() -> None: 772 """Extend SQLGlot with SQLMesh's custom macro aware dialect.""" 773 tokenizers = {Tokenizer} 774 parsers = {Parser} 775 generators = {Generator} 776 777 for dialect in Dialect.classes.values(): 778 if hasattr(dialect, "Tokenizer"): 779 tokenizers.add(dialect.Tokenizer) 780 if hasattr(dialect, "Parser"): 781 parsers.add(dialect.Parser) 782 if hasattr(dialect, "Generator"): 783 generators.add(dialect.Generator) 784 785 for tokenizer in tokenizers: 786 tokenizer.VAR_SINGLE_TOKENS.update(SQLMESH_MACRO_PREFIX) 787 788 for parser in parsers: 789 parser.FUNCTIONS.update({"JINJA": Jinja.from_arg_list, "METRIC": MetricAgg.from_arg_list}) 790 parser.PLACEHOLDER_PARSERS.update({TokenType.PARAMETER: _parse_macro}) 791 parser.QUERY_MODIFIER_PARSERS.update( 792 {TokenType.PARAMETER: lambda self: _parse_body_macro(self)} 793 ) 794 795 for generator in generators: 796 if MacroFunc not in generator.TRANSFORMS: 797 generator.TRANSFORMS.update( 798 { 799 Audit: lambda self, e: _sqlmesh_ddl_sql(self, e, "Audit"), 800 DColonCast: lambda self, e: f"{self.sql(e, 'this')}::{self.sql(e, 'to')}", 801 Jinja: lambda self, e: e.name, 802 JinjaQuery: lambda self, e: f"{JINJA_QUERY_BEGIN};\n{e.name}\n{JINJA_END};", 803 JinjaStatement: lambda self, 804 e: f"{JINJA_STATEMENT_BEGIN};\n{e.name}\n{JINJA_END};", 805 MacroDef: lambda self, e: f"@DEF({self.sql(e.this)}, {self.sql(e.expression)})", 806 MacroFunc: _macro_func_sql, 807 MacroStrReplace: lambda self, e: f"@{self.sql(e.this)}", 808 MacroSQL: lambda self, e: f"@SQL({self.sql(e.this)})", 809 MacroVar: lambda self, e: f"@{e.name}", 810 Metric: lambda self, e: _sqlmesh_ddl_sql(self, e, "METRIC"), 811 Model: lambda self, e: _sqlmesh_ddl_sql(self, e, "MODEL"), 812 ModelKind: _model_kind_sql, 813 PythonCode: lambda self, e: self.expressions(e, sep="\n", indent=False), 814 StagedFilePath: lambda self, e: self.table_sql(e), 815 } 816 ) 817 818 generator.WITH_SEPARATED_COMMENTS = ( 819 *generator.WITH_SEPARATED_COMMENTS, 820 Model, 821 MacroDef, 822 ) 823 824 _override(Parser, _parse_statement) 825 _override(Parser, _parse_join) 826 _override(Parser, _parse_order) 827 _override(Parser, _parse_where) 828 _override(Parser, _parse_group) 829 _override(Parser, _parse_with) 830 _override(Parser, _parse_having) 831 _override(Parser, _parse_limit) 832 _override(Parser, _parse_lambda) 833 _override(Parser, _parse_types) 834 _override(Parser, _parse_if) 835 _override(Parser, _parse_id_var) 836 _override(Snowflake.Parser, _parse_table_parts) 837 838 839def select_from_values( 840 values: t.List[PandasNamedTuple], 841 columns_to_types: t.Dict[str, exp.DataType], 842 batch_size: int = 0, 843 alias: str = "t", 844) -> t.Iterator[exp.Select]: 845 """Generate a VALUES expression that has a select wrapped around it to cast the values to their correct types. 846 847 Args: 848 values: List of values to use for the VALUES expression. 849 columns_to_types: Mapping of column names to types to assign to the values. 850 batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0. 851 alias: The alias to assign to the values expression. If not provided then will default to "t" 852 853 Returns: 854 This method operates as a generator and yields a VALUES expression. 855 """ 856 if batch_size <= 0: 857 batch_size = sys.maxsize 858 num_rows = len(values) 859 for i in range(0, num_rows, batch_size): 860 yield select_from_values_for_batch_range( 861 values=values, 862 columns_to_types=columns_to_types, 863 batch_start=i, 864 batch_end=min(i + batch_size, num_rows), 865 alias=alias, 866 ) 867 868 869def select_from_values_for_batch_range( 870 values: t.List[PandasNamedTuple], 871 columns_to_types: t.Dict[str, exp.DataType], 872 batch_start: int, 873 batch_end: int, 874 alias: str = "t", 875) -> exp.Select: 876 casted_columns = [ 877 exp.alias_(exp.cast(column, to=kind), column, copy=False) 878 for column, kind in columns_to_types.items() 879 ] 880 881 if not values: 882 # Ensures we don't generate an empty VALUES clause & forces a zero-row output 883 where = exp.false() 884 expressions = [tuple(exp.cast(exp.null(), to=kind) for kind in columns_to_types.values())] 885 else: 886 where = None 887 expressions = [ 888 tuple(transform_values(v, columns_to_types)) for v in values[batch_start:batch_end] 889 ] 890 891 values_exp = exp.values(expressions, alias=alias, columns=columns_to_types) 892 return exp.select(*casted_columns).from_(values_exp, copy=False).where(where, copy=False) 893 894 895def pandas_to_sql( 896 df: pd.DataFrame, 897 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 898 batch_size: int = 0, 899 alias: str = "t", 900) -> t.Iterator[exp.Select]: 901 """Convert a pandas dataframe into a VALUES sql statement. 902 903 Args: 904 df: A pandas dataframe to convert. 905 columns_to_types: Mapping of column names to types to assign to the values. 906 batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0. 907 alias: The alias to assign to the values expression. If not provided then will default to "t" 908 909 Returns: 910 This method operates as a generator and yields a VALUES expression. 911 """ 912 yield from select_from_values( 913 values=list(df.itertuples(index=False, name=None)), 914 columns_to_types=columns_to_types or columns_to_types_from_df(df), 915 batch_size=batch_size, 916 alias=alias, 917 ) 918 919 920def set_default_catalog( 921 table: str | exp.Table, 922 default_catalog: t.Optional[str], 923) -> exp.Table: 924 table = exp.to_table(table) 925 926 if default_catalog and not table.catalog and table.db: 927 table.set("catalog", exp.parse_identifier(default_catalog)) 928 929 return table 930 931 932def normalize_model_name( 933 table: str | exp.Table | exp.Column, 934 default_catalog: t.Optional[str], 935 dialect: DialectType = None, 936) -> str: 937 if isinstance(table, exp.Column): 938 table = exp.table_(table.this, db=table.args.get("table"), catalog=table.args.get("db")) 939 else: 940 # We are relying on sqlglot's flexible parsing here to accept quotes from other dialects. 941 # Ex: I have a a normalized name of '"my_table"' but the dialect is spark and therefore we should 942 # expect spark quotes to be backticks ('`') instead of double quotes ('"'). sqlglot today is flexible 943 # and will still parse this correctly and we rely on that. 944 table = exp.to_table(table, dialect=dialect) 945 946 table = set_default_catalog(table, default_catalog) 947 # An alternative way to do this is the following: exp.table_name(table, dialect=dialect, identify=True) 948 # This though would result in the names being normalized to the target dialect AND the quotes while the below 949 # approach just normalizes the names. 950 # By just normalizing names and using sqlglot dialect for quotes this makes it easier for dialects that have 951 # compatible normalization strategies but incompatible quoting to still work together without user hassle 952 return exp.table_name(normalize_identifiers(table, dialect=dialect), identify=True) 953 954 955def find_tables( 956 expression: exp.Expression, default_catalog: t.Optional[str], dialect: DialectType = None 957) -> t.Set[str]: 958 """Find all tables referenced in a query. 959 960 Caches the result in the meta field 'tables'. 961 962 Args: 963 expressions: The query to find the tables in. 964 dialect: The dialect to use for normalization of table names. 965 966 Returns: 967 A Set of all the table names. 968 """ 969 if TABLES_META not in expression.meta: 970 expression.meta[TABLES_META] = { 971 normalize_model_name(table, default_catalog=default_catalog, dialect=dialect) 972 for scope in traverse_scope(expression) 973 for table in scope.tables 974 if table.name and table.name not in scope.cte_sources 975 } 976 return expression.meta[TABLES_META] 977 978 979def add_table(node: exp.Expression, table: str) -> exp.Expression: 980 """Add a table to all columns in an expression.""" 981 982 def _transform(node: exp.Expression) -> exp.Expression: 983 if isinstance(node, exp.Column) and not node.table: 984 return exp.column(node.this, table=table) 985 if isinstance(node, exp.Identifier): 986 return exp.column(node, table=table) 987 return node 988 989 return node.transform(_transform) 990 991 992def transform_values( 993 values: t.Tuple[t.Any, ...], columns_to_types: t.Dict[str, exp.DataType] 994) -> t.Iterator[t.Any]: 995 """Perform transformations on values given columns_to_types.""" 996 for value, col_type in zip(values, columns_to_types.values()): 997 if col_type.is_type(exp.DataType.Type.JSON): 998 yield exp.func("PARSE_JSON", f"'{value}'") 999 elif isinstance(value, dict) and col_type.is_type(*exp.DataType.STRUCT_TYPES): 1000 yield _dict_to_struct(value) 1001 else: 1002 yield value 1003 1004 1005def to_schema(sql_path: str | exp.Table) -> exp.Table: 1006 if isinstance(sql_path, exp.Table) and sql_path.this is None: 1007 return sql_path 1008 table = exp.to_table(sql_path.copy() if isinstance(sql_path, exp.Table) else sql_path) 1009 table.set("catalog", table.args.get("db")) 1010 table.set("db", table.args.get("this")) 1011 table.set("this", None) 1012 return table 1013 1014 1015def schema_( 1016 db: exp.Identifier | str, 1017 catalog: t.Optional[exp.Identifier | str] = None, 1018 quoted: t.Optional[bool] = None, 1019) -> exp.Table: 1020 """Build a Schema. 1021 1022 Args: 1023 db: Database name. 1024 catalog: Catalog name. 1025 quoted: Whether to force quotes on the schema's identifiers. 1026 1027 Returns: 1028 The new Schema instance. 1029 """ 1030 return exp.Table( 1031 this=None, 1032 db=exp.to_identifier(db, quoted=quoted) if db else None, 1033 catalog=exp.to_identifier(catalog, quoted=quoted) if catalog else None, 1034 ) 1035 1036 1037def normalize_mapping_schema(schema: t.Dict, dialect: DialectType) -> MappingSchema: 1038 return MappingSchema(_unquote_schema(schema), dialect=dialect, normalize=False) 1039 1040 1041def _unquote_schema(schema: t.Dict) -> t.Dict: 1042 """SQLGlot schema expects unquoted normalized keys.""" 1043 return { 1044 k.strip('"'): _unquote_schema(v) if isinstance(v, dict) else v for k, v in schema.items() 1045 } 1046 1047 1048def _dict_to_struct(values: t.Dict) -> exp.Struct: 1049 expressions = [] 1050 for key, value in values.items(): 1051 key = exp.to_identifier(key) 1052 value = _dict_to_struct(value) if isinstance(value, dict) else exp.convert(value) 1053 expressions.append(exp.PropertyEQ(this=key, expression=value)) 1054 1055 return exp.Struct(expressions=expressions) 1056 1057 1058@contextmanager 1059def normalize_and_quote( 1060 query: E, dialect: str, default_catalog: t.Optional[str], quote: bool = True 1061) -> t.Iterator[E]: 1062 qualify_tables(query, catalog=default_catalog, dialect=dialect) 1063 normalize_identifiers(query, dialect=dialect) 1064 yield query 1065 if quote: 1066 quote_identifiers(query, dialect=dialect)
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Cast
- output_name
- is_type
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
96class MetricAgg(exp.AggFunc): 97 """Used for computing metrics.""" 98 99 arg_types = {"this": True} 100 101 @property 102 def output_name(self) -> str: 103 return self.this.name
Used for computing metrics.
Name of the output column if this expression is a selection.
If the Expression has no output name, an empty string is returned.
Example:
>>> from sqlglot import parse_one >>> parse_one("SELECT a").expressions[0].output_name 'a' >>> parse_one("SELECT b AS c").expressions[0].output_name 'c' >>> parse_one("SELECT 1 + 2").expressions[0].output_name ''
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Func
- from_arg_list
- sql_names
- sql_name
- default_parser_mappings
Represents paths to "staged files" in Snowflake.
Inherited Members
- sqlglot.expressions.Expression
- Expression
- this
- expression
- expressions
- text
- is_string
- is_number
- is_negative
- is_int
- is_star
- alias
- output_name
- is_type
- is_leaf
- copy
- add_comments
- pop_comments
- append
- set
- depth
- iter_expressions
- find
- find_all
- find_ancestor
- parent_select
- same_parent
- root
- walk
- dfs
- bfs
- unnest
- unalias
- unnest_operands
- flatten
- to_s
- sql
- transform
- replace
- pop
- assert_is
- error_messages
- dump
- load
- and_
- or_
- not_
- as_
- isin
- between
- is_
- like
- ilike
- eq
- neq
- rlike
- div
- asc
- desc
- sqlglot.expressions.Table
- parts
- to_column
575def format_model_expressions( 576 expressions: t.List[exp.Expression], dialect: t.Optional[str] = None, **kwargs: t.Any 577) -> str: 578 """Format a model's expressions into a standardized format. 579 580 Args: 581 expressions: The model's expressions, must be at least model def + query. 582 dialect: The dialect to render the expressions as. 583 **kwargs: Additional keyword arguments to pass to the sql generator. 584 585 Returns: 586 A string representing the formatted model. 587 """ 588 if len(expressions) == 1: 589 return expressions[0].sql(pretty=True, dialect=dialect) 590 591 *statements, query = expressions 592 593 def cast_to_colon(node: exp.Expression) -> exp.Expression: 594 if isinstance(node, exp.Cast) and not any( 595 # Only convert CAST into :: if it doesn't have additional args set, otherwise this 596 # conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST) 597 arg 598 for name, arg in node.args.items() 599 if name not in ("this", "to") 600 ): 601 this = node.this 602 603 if not isinstance(this, (exp.Binary, exp.Unary)) or isinstance(this, exp.Paren): 604 cast = DColonCast(this=this, to=node.to) 605 cast.comments = node.comments 606 node = cast 607 608 exp.replace_children(node, cast_to_colon) 609 return node 610 611 query = query.copy() 612 exp.replace_children(query, cast_to_colon) 613 614 return ";\n\n".join( 615 [ 616 *(statement.sql(pretty=True, dialect=dialect, **kwargs) for statement in statements), 617 query.sql(pretty=True, dialect=dialect, **kwargs), 618 ] 619 ).strip()
Format a model's expressions into a standardized format.
Arguments:
- expressions: The model's expressions, must be at least model def + query.
- dialect: The dialect to render the expressions as.
- **kwargs: Additional keyword arguments to pass to the sql generator.
Returns:
A string representing the formatted model.
622def text_diff( 623 a: t.List[exp.Expression], 624 b: t.List[exp.Expression], 625 a_dialect: t.Optional[str] = None, 626 b_dialect: t.Optional[str] = None, 627) -> str: 628 """Find the unified text diff between two expressions.""" 629 a_sql = [ 630 line 631 for expr in a 632 for line in expr.sql(pretty=True, comments=False, dialect=a_dialect).split("\n") 633 ] 634 b_sql = [ 635 line 636 for expr in b 637 for line in expr.sql(pretty=True, comments=False, dialect=b_dialect).split("\n") 638 ] 639 return "\n".join(unified_diff(a_sql, b_sql))
Find the unified text diff between two expressions.
An enumeration.
Inherited Members
- enum.Enum
- name
- value
688def parse_one( 689 sql: str, dialect: t.Optional[str] = None, into: t.Optional[exp.IntoType] = None 690) -> exp.Expression: 691 expressions = parse(sql, default_dialect=dialect, match_dialect=False, into=into) 692 if not expressions: 693 raise SQLMeshError(f"No expressions found in '{sql}'") 694 elif len(expressions) > 1: 695 raise SQLMeshError(f"Multiple expressions found in '{sql}'") 696 return expressions[0]
699def parse( 700 sql: str, 701 default_dialect: t.Optional[str] = None, 702 match_dialect: bool = True, 703 into: t.Optional[exp.IntoType] = None, 704) -> t.List[exp.Expression]: 705 """Parse a sql string. 706 707 Supports parsing model definition. 708 If a jinja block is detected, the query is stored as raw string in a Jinja node. 709 710 Args: 711 sql: The sql based definition. 712 default_dialect: The dialect to use if the model does not specify one. 713 714 Returns: 715 A list of the parsed expressions: [Model, *Statements, Query, *Statements] 716 """ 717 match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE]) 718 dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect) 719 720 tokens = dialect.tokenizer.tokenize(sql) 721 chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)] 722 total = len(tokens) 723 724 pos = 0 725 while pos < total: 726 token = tokens[pos] 727 if _is_jinja_end(tokens, pos) or ( 728 chunks[-1][1] == ChunkType.SQL 729 and token.token_type == TokenType.SEMICOLON 730 and pos < total - 1 731 ): 732 if token.token_type == TokenType.SEMICOLON: 733 pos += 1 734 else: 735 # Jinja end statement 736 chunks[-1][0].append(token) 737 pos += 2 738 chunks.append(([], ChunkType.SQL)) 739 elif _is_jinja_query_begin(tokens, pos): 740 chunks.append(([token], ChunkType.JINJA_QUERY)) 741 pos += 2 742 elif _is_jinja_statement_begin(tokens, pos): 743 chunks.append(([token], ChunkType.JINJA_STATEMENT)) 744 pos += 2 745 else: 746 chunks[-1][0].append(token) 747 pos += 1 748 749 parser = dialect.parser() 750 expressions: t.List[exp.Expression] = [] 751 752 for chunk, chunk_type in chunks: 753 if chunk_type == ChunkType.SQL: 754 parsed_expressions: t.List[t.Optional[exp.Expression]] = ( 755 parser.parse(chunk, sql) if into is None else parser.parse_into(into, chunk, sql) 756 ) 757 for expression in parsed_expressions: 758 if expression: 759 expression.meta["sql"] = parser._find_sql(chunk[0], chunk[-1]) 760 expressions.append(expression) 761 else: 762 start, *_, end = chunk 763 segment = sql[start.end + 2 : end.start - 1] 764 factory = jinja_query if chunk_type == ChunkType.JINJA_QUERY else jinja_statement 765 expression = factory(segment.strip()) 766 expression.meta["sql"] = sql[start.start : end.end + 1] 767 expressions.append(expression) 768 769 return expressions
Parse a sql string.
Supports parsing model definition. If a jinja block is detected, the query is stored as raw string in a Jinja node.
Arguments:
- sql: The sql based definition.
- default_dialect: The dialect to use if the model does not specify one.
Returns:
A list of the parsed expressions: [Model, *Statements, Query, *Statements]
772def extend_sqlglot() -> None: 773 """Extend SQLGlot with SQLMesh's custom macro aware dialect.""" 774 tokenizers = {Tokenizer} 775 parsers = {Parser} 776 generators = {Generator} 777 778 for dialect in Dialect.classes.values(): 779 if hasattr(dialect, "Tokenizer"): 780 tokenizers.add(dialect.Tokenizer) 781 if hasattr(dialect, "Parser"): 782 parsers.add(dialect.Parser) 783 if hasattr(dialect, "Generator"): 784 generators.add(dialect.Generator) 785 786 for tokenizer in tokenizers: 787 tokenizer.VAR_SINGLE_TOKENS.update(SQLMESH_MACRO_PREFIX) 788 789 for parser in parsers: 790 parser.FUNCTIONS.update({"JINJA": Jinja.from_arg_list, "METRIC": MetricAgg.from_arg_list}) 791 parser.PLACEHOLDER_PARSERS.update({TokenType.PARAMETER: _parse_macro}) 792 parser.QUERY_MODIFIER_PARSERS.update( 793 {TokenType.PARAMETER: lambda self: _parse_body_macro(self)} 794 ) 795 796 for generator in generators: 797 if MacroFunc not in generator.TRANSFORMS: 798 generator.TRANSFORMS.update( 799 { 800 Audit: lambda self, e: _sqlmesh_ddl_sql(self, e, "Audit"), 801 DColonCast: lambda self, e: f"{self.sql(e, 'this')}::{self.sql(e, 'to')}", 802 Jinja: lambda self, e: e.name, 803 JinjaQuery: lambda self, e: f"{JINJA_QUERY_BEGIN};\n{e.name}\n{JINJA_END};", 804 JinjaStatement: lambda self, 805 e: f"{JINJA_STATEMENT_BEGIN};\n{e.name}\n{JINJA_END};", 806 MacroDef: lambda self, e: f"@DEF({self.sql(e.this)}, {self.sql(e.expression)})", 807 MacroFunc: _macro_func_sql, 808 MacroStrReplace: lambda self, e: f"@{self.sql(e.this)}", 809 MacroSQL: lambda self, e: f"@SQL({self.sql(e.this)})", 810 MacroVar: lambda self, e: f"@{e.name}", 811 Metric: lambda self, e: _sqlmesh_ddl_sql(self, e, "METRIC"), 812 Model: lambda self, e: _sqlmesh_ddl_sql(self, e, "MODEL"), 813 ModelKind: _model_kind_sql, 814 PythonCode: lambda self, e: self.expressions(e, sep="\n", indent=False), 815 StagedFilePath: lambda self, e: self.table_sql(e), 816 } 817 ) 818 819 generator.WITH_SEPARATED_COMMENTS = ( 820 *generator.WITH_SEPARATED_COMMENTS, 821 Model, 822 MacroDef, 823 ) 824 825 _override(Parser, _parse_statement) 826 _override(Parser, _parse_join) 827 _override(Parser, _parse_order) 828 _override(Parser, _parse_where) 829 _override(Parser, _parse_group) 830 _override(Parser, _parse_with) 831 _override(Parser, _parse_having) 832 _override(Parser, _parse_limit) 833 _override(Parser, _parse_lambda) 834 _override(Parser, _parse_types) 835 _override(Parser, _parse_if) 836 _override(Parser, _parse_id_var) 837 _override(Snowflake.Parser, _parse_table_parts)
Extend SQLGlot with SQLMesh's custom macro aware dialect.
840def select_from_values( 841 values: t.List[PandasNamedTuple], 842 columns_to_types: t.Dict[str, exp.DataType], 843 batch_size: int = 0, 844 alias: str = "t", 845) -> t.Iterator[exp.Select]: 846 """Generate a VALUES expression that has a select wrapped around it to cast the values to their correct types. 847 848 Args: 849 values: List of values to use for the VALUES expression. 850 columns_to_types: Mapping of column names to types to assign to the values. 851 batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0. 852 alias: The alias to assign to the values expression. If not provided then will default to "t" 853 854 Returns: 855 This method operates as a generator and yields a VALUES expression. 856 """ 857 if batch_size <= 0: 858 batch_size = sys.maxsize 859 num_rows = len(values) 860 for i in range(0, num_rows, batch_size): 861 yield select_from_values_for_batch_range( 862 values=values, 863 columns_to_types=columns_to_types, 864 batch_start=i, 865 batch_end=min(i + batch_size, num_rows), 866 alias=alias, 867 )
Generate a VALUES expression that has a select wrapped around it to cast the values to their correct types.
Arguments:
- values: List of values to use for the VALUES expression.
- columns_to_types: Mapping of column names to types to assign to the values.
- batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
- alias: The alias to assign to the values expression. If not provided then will default to "t"
Returns:
This method operates as a generator and yields a VALUES expression.
870def select_from_values_for_batch_range( 871 values: t.List[PandasNamedTuple], 872 columns_to_types: t.Dict[str, exp.DataType], 873 batch_start: int, 874 batch_end: int, 875 alias: str = "t", 876) -> exp.Select: 877 casted_columns = [ 878 exp.alias_(exp.cast(column, to=kind), column, copy=False) 879 for column, kind in columns_to_types.items() 880 ] 881 882 if not values: 883 # Ensures we don't generate an empty VALUES clause & forces a zero-row output 884 where = exp.false() 885 expressions = [tuple(exp.cast(exp.null(), to=kind) for kind in columns_to_types.values())] 886 else: 887 where = None 888 expressions = [ 889 tuple(transform_values(v, columns_to_types)) for v in values[batch_start:batch_end] 890 ] 891 892 values_exp = exp.values(expressions, alias=alias, columns=columns_to_types) 893 return exp.select(*casted_columns).from_(values_exp, copy=False).where(where, copy=False)
896def pandas_to_sql( 897 df: pd.DataFrame, 898 columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, 899 batch_size: int = 0, 900 alias: str = "t", 901) -> t.Iterator[exp.Select]: 902 """Convert a pandas dataframe into a VALUES sql statement. 903 904 Args: 905 df: A pandas dataframe to convert. 906 columns_to_types: Mapping of column names to types to assign to the values. 907 batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0. 908 alias: The alias to assign to the values expression. If not provided then will default to "t" 909 910 Returns: 911 This method operates as a generator and yields a VALUES expression. 912 """ 913 yield from select_from_values( 914 values=list(df.itertuples(index=False, name=None)), 915 columns_to_types=columns_to_types or columns_to_types_from_df(df), 916 batch_size=batch_size, 917 alias=alias, 918 )
Convert a pandas dataframe into a VALUES sql statement.
Arguments:
- df: A pandas dataframe to convert.
- columns_to_types: Mapping of column names to types to assign to the values.
- batch_size: The maximum number of tuples per batches. Defaults to sys.maxsize if <= 0.
- alias: The alias to assign to the values expression. If not provided then will default to "t"
Returns:
This method operates as a generator and yields a VALUES expression.
933def normalize_model_name( 934 table: str | exp.Table | exp.Column, 935 default_catalog: t.Optional[str], 936 dialect: DialectType = None, 937) -> str: 938 if isinstance(table, exp.Column): 939 table = exp.table_(table.this, db=table.args.get("table"), catalog=table.args.get("db")) 940 else: 941 # We are relying on sqlglot's flexible parsing here to accept quotes from other dialects. 942 # Ex: I have a a normalized name of '"my_table"' but the dialect is spark and therefore we should 943 # expect spark quotes to be backticks ('`') instead of double quotes ('"'). sqlglot today is flexible 944 # and will still parse this correctly and we rely on that. 945 table = exp.to_table(table, dialect=dialect) 946 947 table = set_default_catalog(table, default_catalog) 948 # An alternative way to do this is the following: exp.table_name(table, dialect=dialect, identify=True) 949 # This though would result in the names being normalized to the target dialect AND the quotes while the below 950 # approach just normalizes the names. 951 # By just normalizing names and using sqlglot dialect for quotes this makes it easier for dialects that have 952 # compatible normalization strategies but incompatible quoting to still work together without user hassle 953 return exp.table_name(normalize_identifiers(table, dialect=dialect), identify=True)
956def find_tables( 957 expression: exp.Expression, default_catalog: t.Optional[str], dialect: DialectType = None 958) -> t.Set[str]: 959 """Find all tables referenced in a query. 960 961 Caches the result in the meta field 'tables'. 962 963 Args: 964 expressions: The query to find the tables in. 965 dialect: The dialect to use for normalization of table names. 966 967 Returns: 968 A Set of all the table names. 969 """ 970 if TABLES_META not in expression.meta: 971 expression.meta[TABLES_META] = { 972 normalize_model_name(table, default_catalog=default_catalog, dialect=dialect) 973 for scope in traverse_scope(expression) 974 for table in scope.tables 975 if table.name and table.name not in scope.cte_sources 976 } 977 return expression.meta[TABLES_META]
Find all tables referenced in a query.
Caches the result in the meta field 'tables'.
Arguments:
- expressions: The query to find the tables in.
- dialect: The dialect to use for normalization of table names.
Returns:
A Set of all the table names.
980def add_table(node: exp.Expression, table: str) -> exp.Expression: 981 """Add a table to all columns in an expression.""" 982 983 def _transform(node: exp.Expression) -> exp.Expression: 984 if isinstance(node, exp.Column) and not node.table: 985 return exp.column(node.this, table=table) 986 if isinstance(node, exp.Identifier): 987 return exp.column(node, table=table) 988 return node 989 990 return node.transform(_transform)
Add a table to all columns in an expression.
993def transform_values( 994 values: t.Tuple[t.Any, ...], columns_to_types: t.Dict[str, exp.DataType] 995) -> t.Iterator[t.Any]: 996 """Perform transformations on values given columns_to_types.""" 997 for value, col_type in zip(values, columns_to_types.values()): 998 if col_type.is_type(exp.DataType.Type.JSON): 999 yield exp.func("PARSE_JSON", f"'{value}'") 1000 elif isinstance(value, dict) and col_type.is_type(*exp.DataType.STRUCT_TYPES): 1001 yield _dict_to_struct(value) 1002 else: 1003 yield value
Perform transformations on values given columns_to_types.
1006def to_schema(sql_path: str | exp.Table) -> exp.Table: 1007 if isinstance(sql_path, exp.Table) and sql_path.this is None: 1008 return sql_path 1009 table = exp.to_table(sql_path.copy() if isinstance(sql_path, exp.Table) else sql_path) 1010 table.set("catalog", table.args.get("db")) 1011 table.set("db", table.args.get("this")) 1012 table.set("this", None) 1013 return table
1016def schema_( 1017 db: exp.Identifier | str, 1018 catalog: t.Optional[exp.Identifier | str] = None, 1019 quoted: t.Optional[bool] = None, 1020) -> exp.Table: 1021 """Build a Schema. 1022 1023 Args: 1024 db: Database name. 1025 catalog: Catalog name. 1026 quoted: Whether to force quotes on the schema's identifiers. 1027 1028 Returns: 1029 The new Schema instance. 1030 """ 1031 return exp.Table( 1032 this=None, 1033 db=exp.to_identifier(db, quoted=quoted) if db else None, 1034 catalog=exp.to_identifier(catalog, quoted=quoted) if catalog else None, 1035 )
Build a Schema.
Arguments:
- db: Database name.
- catalog: Catalog name.
- quoted: Whether to force quotes on the schema's identifiers.
Returns:
The new Schema instance.
1059@contextmanager 1060def normalize_and_quote( 1061 query: E, dialect: str, default_catalog: t.Optional[str], quote: bool = True 1062) -> t.Iterator[E]: 1063 qualify_tables(query, catalog=default_catalog, dialect=dialect) 1064 normalize_identifiers(query, dialect=dialect) 1065 yield query 1066 if quote: 1067 quote_identifiers(query, dialect=dialect)