Edit on GitHub
sqlmesh.core.engine_adapter.clickhouse

View Source
  1from __future__ import annotations
  2
  3import typing as t
  4import logging
  5import re
  6from sqlglot import exp, maybe_parse
  7from sqlmesh.core.dialect import to_schema
  8from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin
  9from sqlmesh.core.engine_adapter.base import EngineAdapterWithIndexSupport
 10from sqlmesh.core.engine_adapter.shared import (
 11    DataObject,
 12    DataObjectType,
 13    EngineRunMode,
 14    SourceQuery,
 15    CommentCreationView,
 16    InsertOverwriteStrategy,
 17)
 18from sqlmesh.core.schema_diff import TableAlterOperation
 19from sqlmesh.utils import get_source_columns_to_types
 20
 21if t.TYPE_CHECKING:
 22    import pandas as pd
 23
 24    from sqlmesh.core._typing import SchemaName, TableName
 25    from sqlmesh.core.engine_adapter._typing import DF, Query, QueryOrDF
 26
 27    from sqlmesh.core.node import IntervalUnit
 28
 29
 30logger = logging.getLogger(__name__)
 31
 32
 33class ClickhouseEngineAdapter(EngineAdapterWithIndexSupport, LogicalMergeMixin):
 34    DIALECT = "clickhouse"
 35    SUPPORTS_TRANSACTIONS = False
 36    SUPPORTS_VIEW_SCHEMA = False
 37    SUPPORTS_REPLACE_TABLE = False
 38    COMMENT_CREATION_VIEW = CommentCreationView.COMMENT_COMMAND_ONLY
 39
 40    SCHEMA_DIFFER_KWARGS = {}
 41
 42    DEFAULT_TABLE_ENGINE = "MergeTree"
 43    ORDER_BY_TABLE_ENGINE_REGEX = "^.*?MergeTree.*$"
 44
 45    @property
 46    def engine_run_mode(self) -> EngineRunMode:
 47        if self._extra_config.get("cloud_mode"):
 48            return EngineRunMode.CLOUD
 49        # we use the user's specification of a cluster in the connection config to determine if
 50        #   the engine is in cluster mode
 51        if self._extra_config.get("cluster"):
 52            return EngineRunMode.CLUSTER
 53        return EngineRunMode.STANDALONE
 54
 55    @property
 56    def cluster(self) -> t.Optional[str]:
 57        return self._extra_config.get("cluster")
 58
 59    # Workaround for clickhouse-connect cursor bug
 60    # - cursor does not reset row index correctly on `close()`, so `fetchone()` and `fetchmany()`
 61    #     return the wrong (or no) rows after the very first cursor query that returns rows
 62    #     in the connection
 63    # - cursor does reset the data rows correctly on `close()`, so `fetchall()` works because it
 64    #     doesn't use the row index at all
 65    def fetchone(
 66        self,
 67        query: t.Union[exp.Expression, str],
 68        ignore_unsupported_errors: bool = False,
 69        quote_identifiers: bool = False,
 70    ) -> t.Tuple:
 71        with self.transaction():
 72            self.execute(
 73                query,
 74                ignore_unsupported_errors=ignore_unsupported_errors,
 75                quote_identifiers=quote_identifiers,
 76            )
 77            return self.cursor.fetchall()[0]
 78
 79    def _fetch_native_df(
 80        self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False
 81    ) -> pd.DataFrame:
 82        """Fetches a Pandas DataFrame from the cursor"""
 83        return self.cursor.client.query_df(
 84            self._to_sql(query, quote=quote_identifiers)
 85            if isinstance(query, exp.Expression)
 86            else query,
 87            use_extended_dtypes=True,
 88        )
 89
 90    def _df_to_source_queries(
 91        self,
 92        df: DF,
 93        target_columns_to_types: t.Dict[str, exp.DataType],
 94        batch_size: int,
 95        target_table: TableName,
 96        source_columns: t.Optional[t.List[str]] = None,
 97        **kwargs: t.Any,
 98    ) -> t.List[SourceQuery]:
 99        temp_table = self._get_temp_table(target_table, **kwargs)
100        source_columns_to_types = get_source_columns_to_types(
101            target_columns_to_types, source_columns
102        )
103
104        def query_factory() -> Query:
105            # It is possible for the factory to be called multiple times and if so then the temp table will already
106            # be created so we skip creating again. This means we are assuming the first call is the same result
107            # as later calls.
108            if not self.table_exists(temp_table):
109                self.create_table(
110                    temp_table,
111                    source_columns_to_types,
112                    storage_format=exp.var("MergeTree"),
113                    **kwargs,
114                )
115                ordered_df = df[list(source_columns_to_types)]
116
117                self.cursor.client.insert_df(temp_table.sql(dialect=self.dialect), df=ordered_df)
118
119            return exp.select(*self._casted_columns(target_columns_to_types, source_columns)).from_(
120                temp_table
121            )
122
123        return [
124            SourceQuery(
125                query_factory=query_factory,
126                cleanup_func=lambda: self.drop_table(temp_table, **kwargs),
127            )
128        ]
129
130    def _get_data_objects(
131        self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None
132    ) -> t.List[DataObject]:
133        """
134        Returns all the data objects that exist in the given database.
135        """
136        query = (
137            exp.select(
138                exp.column("database").as_("schema_name"),
139                exp.column("name"),
140                exp.case(exp.column("engine"))
141                .when(
142                    exp.Literal.string("View"),
143                    exp.Literal.string("view"),
144                )
145                .else_(
146                    exp.Literal.string("table"),
147                )
148                .as_("type"),
149            )
150            .from_("system.tables")
151            .where(exp.column("database").eq(to_schema(schema_name).db))
152        )
153        if object_names:
154            query = query.where(exp.column("name").isin(*object_names))
155        df = self.fetchdf(query)
156        return [
157            DataObject(
158                catalog=None,
159                schema=row.schema_name,
160                name=row.name,
161                type=DataObjectType.from_str(row.type),  # type: ignore
162            )
163            for row in df.itertuples()
164        ]
165
166    def create_schema(
167        self,
168        schema_name: SchemaName,
169        ignore_if_exists: bool = True,
170        warn_on_error: bool = True,
171        properties: t.List[exp.Expression] = [],
172    ) -> None:
173        """Create a Clickhouse database from a name or qualified table name.
174
175        Clickhouse has a two-level naming scheme [database].[table].
176        """
177        properties_copy = properties.copy()
178        if self.engine_run_mode.is_cluster:
179            properties_copy.append(exp.OnCluster(this=exp.to_identifier(self.cluster)))
180
181        # can't call super() because it will try to set a catalog
182        return self._create_schema(
183            schema_name=schema_name,
184            ignore_if_exists=ignore_if_exists,
185            warn_on_error=warn_on_error,
186            properties=properties_copy,
187            # sqlglot transpiles CREATE SCHEMA to CREATE DATABASE, but this text is used in an error message
188            kind="DATABASE",
189        )
190
191    def _insert_overwrite_by_condition(
192        self,
193        table_name: TableName,
194        source_queries: t.List[SourceQuery],
195        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
196        where: t.Optional[exp.Condition] = None,
197        insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None,
198        **kwargs: t.Any,
199    ) -> None:
200        """
201        Implements the table or partition swap approach to insert-overwriting records.
202
203        Because this method executes multiple variants (full table replace, replace by time
204        range, replace by key, replace by partition), some upstream caller info is needed and
205        passed via kwargs.
206
207        Args:
208            table_name: Name of target table
209            source_queries: Source queries returning records to insert
210            target_columns_to_types: Column names and data types of target table
211            where: SQLGlot expression determining which target table rows should be overwritten
212            insert_overwrite_strategy_override: Not used by Clickhouse
213            kwargs:
214                dynamic_key: Key columns (replace by key only)
215                dynamic_key_exp: Expression to build key (replace by key only)
216                dynamic_key_unique: Whether more than one record can exist per key value (replace by key only)
217
218                keep_existing_partition_rows: Whether to overwrite partitions with only new records (incremental by partition only)
219
220        Returns:
221            Side effects only: execution of insert-overwrite operation.
222        """
223        target_table = exp.to_table(table_name)
224        target_columns_to_types = target_columns_to_types or self.columns(target_table)
225
226        temp_table = self._get_temp_table(target_table)
227        self.create_table_like(temp_table, target_table)
228
229        # REPLACE BY KEY: extract kwargs if present
230        dynamic_key = kwargs.get("dynamic_key")
231        if dynamic_key:
232            dynamic_key_exp = t.cast(exp.Expression, kwargs.get("dynamic_key_exp"))
233            dynamic_key_unique = t.cast(bool, kwargs.get("dynamic_key_unique"))
234
235        try:
236            # insert new records into temp table
237            for source_query in source_queries:
238                with source_query as query:
239                    # REPLACE BY KEY: if unique key, DISTINCTify by key columns so only one row is present per key
240                    if dynamic_key and dynamic_key_unique:
241                        query = query.distinct(*dynamic_key)  # type: ignore
242
243                    query = self._order_projections_and_filter(
244                        query, target_columns_to_types, where=where
245                    )
246                    self._insert_append_query(
247                        temp_table,
248                        query,
249                        target_columns_to_types=target_columns_to_types,
250                        order_projections=False,
251                    )
252
253            # REPLACE BY KEY: build `where` expression as "key IN (new rows' key values)"
254            if dynamic_key:
255                key_query = exp.select(dynamic_key_exp).from_(temp_table)
256                if not dynamic_key_unique:
257                    key_query = key_query.distinct()
258                where = dynamic_key_exp.isin(query=key_query)
259
260            # get target table partition key to confirm it's actually partitioned
261            table_partition_exp = self.fetchone(
262                exp.select("partition_key")
263                .from_("system.tables")
264                .where(
265                    exp.column("database").eq(target_table.db),
266                    exp.column("name").eq(target_table.name),
267                )
268            )
269
270            all_affected_partitions: t.Set[str] = set()
271
272            if where:
273                # identify existing records to keep by inverting the delete `where` clause
274                existing_records_insert_exp = exp.insert(
275                    self._select_columns(target_columns_to_types)
276                    .from_(target_table)
277                    .where(exp.paren(expression=where).not_()),
278                    temp_table,
279                )
280
281                # if target table is partitioned, modify insert expression to only insert
282                #   existing records that are in one of the affected partitions
283                if table_partition_exp:
284                    partitions_temp_table_name = self._get_temp_table(
285                        exp.to_table(f"{target_table.db}._affected_partitions")
286                    )
287                    all_affected_partitions, existing_records_insert_exp = (
288                        self._get_affected_partitions_and_insert_exp(
289                            target_table,
290                            temp_table,
291                            where,
292                            existing_records_insert_exp,
293                            partitions_temp_table_name,
294                        )
295                    )
296
297                try:
298                    self.execute(existing_records_insert_exp, track_rows_processed=True)
299                finally:
300                    if table_partition_exp:
301                        self.drop_table(partitions_temp_table_name)
302
303            # process by partition if:
304            #   1. The table is partitioned AND
305            #   (2a. There are existing records to keep (`where`) OR
306            #    2b. We're overwriting existing partition rows (incremental by partition model))
307            if table_partition_exp and (
308                where or kwargs.get("keep_existing_partition_rows") is False
309            ):
310                # only replace partitions that have records in temp_table
311                partitions_to_replace = self._get_partition_ids(temp_table)
312
313                # drop affected partitions that have no records in temp_table
314                #   - NOTE: `all_affected_partitions` will be empty when keep_existing_partition_rows=False
315                #      because previous code block is skipped
316                partitions_to_drop = all_affected_partitions - partitions_to_replace
317
318                if partitions_to_replace or partitions_to_drop:
319                    self.alter_table(
320                        [
321                            self._build_alter_partition_exp(
322                                target_table, temp_table, partitions_to_replace, partitions_to_drop
323                            )
324                        ]
325                    )
326            else:
327                self._exchange_tables(target_table, temp_table)
328        finally:
329            self.drop_table(temp_table)
330
331    def _get_affected_partitions_and_insert_exp(
332        self,
333        target_table: exp.Table,
334        temp_table: exp.Table,
335        where: exp.Condition,
336        existing_records_insert_exp: exp.Insert,
337        partitions_temp_table_name: exp.Table,
338    ) -> tuple[t.Set[str], exp.Insert]:
339        # identify all affected partition IDs
340        #   - store in temp table so we can reuse results
341        self.ctas(
342            partitions_temp_table_name,
343            exp.select("partition_id")
344            .distinct()
345            .from_(
346                exp.union(
347                    # target table partitions with records in `where`
348                    exp.select(exp.column("_partition_id").as_("partition_id"))
349                    .from_(target_table)
350                    .where(where),
351                    # temp table partitions with new records to insert
352                    exp.select(
353                        exp.column("_partition_id").as_("partition_id"),
354                    ).from_(temp_table),
355                ).subquery("_affected_partitions")
356            ),
357        )
358
359        # read all affected partition IDs into memory
360        all_affected_partitions = self._get_partition_ids(
361            partitions_temp_table_name, "partition_id"
362        )
363
364        # limit existing records insert expression WHERE to affected target table partitions
365        #   by adding `AND _partition_id IN (SELECT partition_id FROM partitions_temp_table)`
366        existing_records_insert_exp.set(
367            "expression",
368            existing_records_insert_exp.expression.where(
369                exp.column("_partition_id").isin(
370                    exp.select("partition_id").from_(partitions_temp_table_name)
371                )
372            ),
373        )
374
375        return all_affected_partitions, existing_records_insert_exp
376
377    def _build_alter_partition_exp(
378        self,
379        target_table: exp.Table,
380        temp_table: exp.Table,
381        partitions_to_replace: t.Set[str],
382        partitions_to_drop: t.Set[str],
383    ) -> exp.Alter:
384        alter_expr = exp.Alter(this=target_table, kind="TABLE")
385
386        for partition in partitions_to_replace:
387            alter_expr.append(
388                "actions",
389                exp.ReplacePartition(
390                    expression=exp.Partition(
391                        expressions=[exp.PartitionId(this=exp.Literal.string(str(partition)))]
392                    ),
393                    source=temp_table,
394                ),
395            )
396
397        for partition in partitions_to_drop:
398            alter_expr.append(
399                "actions",
400                exp.DropPartition(
401                    expressions=[
402                        exp.Partition(
403                            expressions=[exp.PartitionId(this=exp.Literal.string(str(partition)))]
404                        )
405                    ],
406                    source=temp_table,
407                ),
408            )
409
410        return alter_expr
411
412    def _replace_by_key(
413        self,
414        target_table: TableName,
415        source_table: QueryOrDF,
416        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]],
417        key: t.Sequence[exp.Expression],
418        is_unique_key: bool,
419        source_columns: t.Optional[t.List[str]] = None,
420    ) -> None:
421        source_queries, target_columns_to_types = self._get_source_queries_and_columns_to_types(
422            source_table,
423            target_columns_to_types,
424            target_table=target_table,
425            source_columns=source_columns,
426        )
427
428        key_exp = exp.func("CONCAT_WS", "'__SQLMESH_DELIM__'", *key) if len(key) > 1 else key[0]
429
430        self._insert_overwrite_by_condition(
431            target_table,
432            source_queries,
433            target_columns_to_types,
434            dynamic_key=key,
435            dynamic_key_exp=key_exp,
436            dynamic_key_unique=is_unique_key,
437        )
438
439    def insert_overwrite_by_partition(
440        self,
441        table_name: TableName,
442        query_or_df: QueryOrDF,
443        partitioned_by: t.List[exp.Expression],
444        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
445        source_columns: t.Optional[t.List[str]] = None,
446    ) -> None:
447        source_queries, target_columns_to_types = self._get_source_queries_and_columns_to_types(
448            query_or_df,
449            target_columns_to_types,
450            target_table=table_name,
451            source_columns=source_columns,
452        )
453
454        self._insert_overwrite_by_condition(
455            table_name, source_queries, target_columns_to_types, keep_existing_partition_rows=False
456        )
457
458    def _create_table_like(
459        self,
460        target_table_name: TableName,
461        source_table_name: TableName,
462        exists: bool,
463        **kwargs: t.Any,
464    ) -> None:
465        """Create table with identical structure as source table"""
466        self.execute(
467            f"CREATE TABLE {target_table_name}{self._on_cluster_sql()} AS {source_table_name}"
468        )
469
470    def _get_partition_ids(
471        self,
472        table: exp.Table,
473        partition_col_name: str = "_partition_id",
474        where: t.Optional[exp.Condition] = None,
475        limit: t.Optional[int] = None,
476    ) -> t.Set[t.Any]:
477        """List partition IDs present in table"""
478        partitions_query = exp.select(partition_col_name).distinct().from_(table)
479        if where:
480            partitions_query = partitions_query.where(where)
481        if limit:
482            partitions_query = partitions_query.limit(limit)
483        partitions = self.fetchall(partitions_query)
484
485        return set([part[0] for part in partitions] if partitions else [])
486
487    def _create_table(
488        self,
489        table_name_or_schema: t.Union[exp.Schema, TableName],
490        expression: t.Optional[exp.Expression],
491        exists: bool = True,
492        replace: bool = False,
493        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
494        table_description: t.Optional[str] = None,
495        column_descriptions: t.Optional[t.Dict[str, str]] = None,
496        table_kind: t.Optional[str] = None,
497        track_rows_processed: bool = True,
498        **kwargs: t.Any,
499    ) -> None:
500        """Creates a table in the database.
501
502        Clickhouse Cloud requires doing CTAS in two steps.
503
504        First, we add the `EMPTY` property to the CTAS call to create a table with the proper
505        schema, then insert the data with the CTAS query.
506        """
507        # ensure columns used for partitioning are non-Nullable
508        #   - normally user's responsibility, but we automatically partition by time column in
509        #       incremental by time models
510        if kwargs.get("partitioned_by"):
511            partition_cols = [
512                col.name
513                for part_expr in kwargs["partitioned_by"]
514                for col in part_expr.find_all(exp.Column)
515            ]
516            if isinstance(table_name_or_schema, exp.Schema):
517                for coldef in table_name_or_schema.expressions:
518                    if coldef.name in partition_cols:
519                        coldef.kind.set("nullable", False)
520            if target_columns_to_types:
521                for col in partition_cols:
522                    target_columns_to_types[col].set("nullable", False)
523
524        super()._create_table(
525            table_name_or_schema,
526            expression,
527            exists,
528            replace,
529            target_columns_to_types,
530            table_description,
531            column_descriptions,
532            table_kind,
533            empty_ctas=(self.engine_run_mode.is_cloud and expression is not None),
534            track_rows_processed=track_rows_processed,
535            **kwargs,
536        )
537
538        # execute the second INSERT step if on cloud and creating a table
539        # - Additional clause is to avoid clickhouse-connect HTTP client bug where CTAS LIMIT 0
540        #     returns a success code but malformed response
541        if (
542            self.engine_run_mode.is_cloud
543            and table_kind != "VIEW"
544            and expression
545            and not (
546                expression.args.get("limit") is not None
547                and expression.args["limit"].expression.this == "0"
548            )
549        ):
550            table_name = (
551                table_name_or_schema.this
552                if isinstance(table_name_or_schema, exp.Schema)
553                else table_name_or_schema
554            )
555            self._insert_append_query(
556                table_name,
557                expression,  # type: ignore
558                target_columns_to_types or self.columns(table_name),
559            )
560
561    def _exchange_tables(
562        self,
563        old_table_name: TableName,
564        new_table_name: TableName,
565    ) -> None:
566        from clickhouse_connect.driver.exceptions import DatabaseError  # type: ignore
567
568        old_table_sql = exp.to_table(old_table_name).sql(dialect=self.dialect, identify=True)
569        new_table_sql = exp.to_table(new_table_name).sql(dialect=self.dialect, identify=True)
570
571        try:
572            self.execute(
573                f"EXCHANGE TABLES {old_table_sql} AND {new_table_sql}{self._on_cluster_sql()}"
574            )
575        except DatabaseError as e:
576            if "NOT_IMPLEMENTED" in str(e):
577                # If someone is using an old Clickhouse version, an OS that doesn't support atomic exchanges,
578                # or a database engine that doesn't support atomic exchanges, we do a non-atomic rename instead.
579                #
580                # Executing multiple renames in one call like `RENAME TABLE a to b, c to a` is supported
581                # but not an atomic operation. Because it is not atomic, doing it in two calls is equivalent
582                # and does not require defining an additional method.
583                throwaway_table_name = self._get_temp_table(old_table_name)
584                self._rename_table(old_table_name, throwaway_table_name)
585                self._rename_table(new_table_name, old_table_name)
586                self.drop_table(throwaway_table_name)
587
588    def _rename_table(
589        self,
590        old_table_name: TableName,
591        new_table_name: TableName,
592    ) -> None:
593        old_table_sql = exp.to_table(old_table_name).sql(dialect=self.dialect, identify=True)
594        new_table_sql = exp.to_table(new_table_name).sql(dialect=self.dialect, identify=True)
595
596        self.execute(f"RENAME TABLE {old_table_sql} TO {new_table_sql}{self._on_cluster_sql()}")
597
598    def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None:
599        delete_expr = exp.delete(table_name, where)
600        if self.engine_run_mode.is_cluster:
601            delete_expr.set("cluster", exp.OnCluster(this=exp.to_identifier(self.cluster)))
602        self.execute(delete_expr)
603
604    def alter_table(
605        self,
606        alter_expressions: t.Union[t.List[exp.Alter], t.List[TableAlterOperation]],
607    ) -> None:
608        """
609        Performs the alter statements to change the current table into the structure of the target table.
610        """
611        with self.transaction():
612            for alter_expression in [
613                x.expression if isinstance(x, TableAlterOperation) else x for x in alter_expressions
614            ]:
615                if self.engine_run_mode.is_cluster:
616                    alter_expression.set(
617                        "cluster", exp.OnCluster(this=exp.to_identifier(self.cluster))
618                    )
619                self.execute(alter_expression)
620
621    def _drop_object(
622        self,
623        name: TableName | SchemaName,
624        exists: bool = True,
625        kind: str = "TABLE",
626        cascade: bool = False,
627        **drop_args: t.Any,
628    ) -> None:
629        """Drops an object.
630
631        An object could be a DATABASE, SCHEMA, VIEW, TABLE, DYNAMIC TABLE, TEMPORARY TABLE etc depending on the :kind.
632
633        Args:
634            name: The name of the table to drop.
635            exists: If exists, defaults to True.
636            kind: What kind of object to drop. Defaults to TABLE
637            **drop_args: Any extra arguments to set on the Drop expression
638        """
639        super()._drop_object(
640            name=name,
641            exists=exists,
642            kind=kind,
643            cascade=cascade,
644            cluster=exp.OnCluster(this=exp.to_identifier(self.cluster))
645            if self.engine_run_mode.is_cluster
646            else None,
647            **drop_args,
648        )
649
650    def _build_partitioned_by_exp(
651        self,
652        partitioned_by: t.List[exp.Expression],
653        **kwargs: t.Any,
654    ) -> t.Optional[t.Union[exp.PartitionedByProperty, exp.Property]]:
655        return exp.PartitionedByProperty(
656            this=exp.Schema(expressions=partitioned_by),
657        )
658
659    def ensure_nulls_for_unmatched_after_join(
660        self,
661        query: Query,
662    ) -> Query:
663        # Set `join_use_nulls = 1` in a query's SETTINGS clause
664        query.append("settings", exp.var("join_use_nulls").eq(exp.Literal.number("1")))
665        return query
666
667    def use_server_nulls_for_unmatched_after_join(
668        self,
669        query: Query,
670    ) -> Query:
671        # Set the `join_use_nulls` server value in a query's SETTINGS clause
672        #
673        # Use in SCD models:
674        #  - The SCD query we build must include the setting `join_use_nulls = 1` to ensure that empty cells in a join
675        #      are filled with NULL instead of the default data type value. The default join_use_nulls value is `0`.
676        #  - The SCD embeds the user's original query in the `source` CTE
677        #  - Settings are dynamically scoped, so our setting may override the server's default setting the user expects
678        #      for their query.
679        #  - To prevent this, we:
680        #     - If the user query sets `join_use_nulls`, we do nothing
681        #     - If the user query does not set `join_use_nulls`, we query the server for the current setting
682        #       - If the server value is 1, we do nothing
683        #       - If the server values is not 1, we inject its `join_use_nulls` value into the user query
684        #     - We do not need to check user subqueries because our injected setting operates at the same scope the
685        #         server value would normally operate at
686        setting_name = "join_use_nulls"
687        setting_value = "1"
688
689        user_settings = query.args.get("settings")
690        # if user has not already set it explicitly
691        if not (
692            user_settings
693            and any(
694                [
695                    isinstance(setting, exp.EQ) and setting.name == setting_name
696                    for setting in user_settings
697                ]
698            )
699        ):
700            server_value = self.fetchone(
701                exp.select("value")
702                .from_("system.settings")
703                .where(exp.column("name").eq(exp.Literal.string(setting_name)))
704            )[0]
705            # only inject the setting if the server value isn't 1
706            inject_setting = setting_value != server_value
707            setting_value = server_value if inject_setting else setting_value
708
709            if inject_setting:
710                query.append(
711                    "settings", exp.var(setting_name).eq(exp.Literal.number(setting_value))
712                )
713
714        return query
715
716    def _build_settings_property(
717        self, key: str, value: exp.Expression | str | int | float
718    ) -> exp.SettingsProperty:
719        return exp.SettingsProperty(
720            expressions=[
721                exp.EQ(
722                    this=exp.var(key.lower()),
723                    expression=value
724                    if isinstance(value, exp.Expression)
725                    else exp.Literal(this=value, is_string=isinstance(value, str)),
726                )
727            ]
728        )
729
730    def _build_table_properties_exp(
731        self,
732        catalog_name: t.Optional[str] = None,
733        table_format: t.Optional[str] = None,
734        storage_format: t.Optional[str] = None,
735        partitioned_by: t.Optional[t.List[exp.Expression]] = None,
736        partition_interval_unit: t.Optional[IntervalUnit] = None,
737        clustered_by: t.Optional[t.List[exp.Expression]] = None,
738        table_properties: t.Optional[t.Dict[str, exp.Expression]] = None,
739        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
740        table_description: t.Optional[str] = None,
741        table_kind: t.Optional[str] = None,
742        empty_ctas: bool = False,
743        **kwargs: t.Any,
744    ) -> t.Optional[exp.Properties]:
745        properties: t.List[exp.Expression] = []
746
747        table_engine = self.DEFAULT_TABLE_ENGINE
748        if storage_format:
749            table_engine = (
750                storage_format.this if isinstance(storage_format, exp.Var) else storage_format  # type: ignore
751            )
752        properties.append(exp.EngineProperty(this=table_engine))
753
754        # copy of table_properties so we can pop items off below then consume the rest later
755        table_properties_copy = {
756            k.upper(): v for k, v in (table_properties.copy() if table_properties else {}).items()
757        }
758
759        mergetree_engine = bool(re.search(self.ORDER_BY_TABLE_ENGINE_REGEX, table_engine))
760        ordered_by_raw = table_properties_copy.pop("ORDER_BY", None)
761        if mergetree_engine:
762            ordered_by_exprs = []
763            if ordered_by_raw:
764                ordered_by_vals = []
765
766                if isinstance(ordered_by_raw, (exp.Tuple, exp.Array)):
767                    ordered_by_vals = ordered_by_raw.expressions
768                if isinstance(ordered_by_raw, exp.Paren):
769                    ordered_by_vals = [ordered_by_raw.this]
770
771                if not ordered_by_vals:
772                    ordered_by_vals = (
773                        ordered_by_raw if isinstance(ordered_by_raw, list) else [ordered_by_raw]
774                    )
775
776                for col in ordered_by_vals:
777                    ordered_by_exprs.append(
778                        col
779                        if isinstance(col, exp.Column)
780                        else maybe_parse(
781                            col.name if isinstance(col, exp.Literal) else col,
782                            dialect=self.dialect,
783                            into=exp.Ordered,
784                        )
785                    )
786
787            properties.append(exp.Order(expressions=[exp.Tuple(expressions=ordered_by_exprs)]))
788
789        primary_key = table_properties_copy.pop("PRIMARY_KEY", None)
790        if mergetree_engine and primary_key:
791            primary_key_vals = []
792            if isinstance(primary_key, (exp.Tuple, exp.Array)):
793                primary_key_vals = primary_key.expressions
794            if isinstance(ordered_by_raw, exp.Paren):
795                primary_key_vals = [primary_key.this]
796
797            if not primary_key_vals:
798                primary_key_vals = primary_key if isinstance(primary_key, list) else [primary_key]
799
800            properties.append(
801                exp.PrimaryKey(
802                    expressions=[
803                        exp.to_column(k.name if isinstance(k, exp.Literal) else k)
804                        for k in primary_key_vals
805                    ]
806                )
807            )
808
809        ttl = table_properties_copy.pop("TTL", None)
810        if ttl:
811            properties.append(
812                exp.MergeTreeTTL(
813                    expressions=[ttl if isinstance(ttl, exp.Expression) else exp.var(ttl)]
814                )
815            )
816
817        if (
818            partitioned_by
819            and (partitioned_by_prop := self._build_partitioned_by_exp(partitioned_by)) is not None
820        ):
821            properties.append(partitioned_by_prop)
822
823        if self.engine_run_mode.is_cluster:
824            properties.append(exp.OnCluster(this=exp.to_identifier(self.cluster)))
825
826        if empty_ctas:
827            properties.append(exp.EmptyProperty())
828
829        if table_properties_copy:
830            properties.extend(
831                [self._build_settings_property(k, v) for k, v in table_properties_copy.items()]
832            )
833
834        if table_description:
835            properties.append(
836                exp.SchemaCommentProperty(
837                    this=exp.Literal.string(self._truncate_table_comment(table_description))
838                )
839            )
840
841        if properties:
842            return exp.Properties(expressions=properties)
843
844        return None
845
846    def _build_view_properties_exp(
847        self,
848        view_properties: t.Optional[t.Dict[str, exp.Expression]] = None,
849        table_description: t.Optional[str] = None,
850        **kwargs: t.Any,
851    ) -> t.Optional[exp.Properties]:
852        """Creates a SQLGlot table properties expression for view"""
853        properties: t.List[exp.Expression] = []
854
855        view_properties_copy = view_properties.copy() if view_properties else {}
856
857        if self.engine_run_mode.is_cluster:
858            properties.append(exp.OnCluster(this=exp.to_identifier(self.cluster)))
859
860        if view_properties_copy:
861            properties.extend(
862                [self._build_settings_property(k, v) for k, v in view_properties_copy.items()]
863            )
864
865        if table_description:
866            properties.append(
867                exp.SchemaCommentProperty(
868                    this=exp.Literal.string(self._truncate_table_comment(table_description))
869                )
870            )
871
872        if properties:
873            return exp.Properties(expressions=properties)
874        return None
875
876    def _build_create_comment_table_exp(
877        self, table: exp.Table, table_comment: str, table_kind: str, **kwargs: t.Any
878    ) -> exp.Comment | str:
879        table_sql = table.sql(dialect=self.dialect, identify=True)
880
881        truncated_comment = self._truncate_table_comment(table_comment)
882        comment_sql = exp.Literal.string(truncated_comment).sql(dialect=self.dialect)
883
884        return f"ALTER TABLE {table_sql}{self._on_cluster_sql()} MODIFY COMMENT {comment_sql}"
885
886    def _build_create_comment_column_exp(
887        self,
888        table: exp.Table,
889        column_name: str,
890        column_comment: str,
891        table_kind: str = "TABLE",
892        **kwargs: t.Any,
893    ) -> exp.Comment | str:
894        table_sql = table.sql(dialect=self.dialect, identify=True)
895        column_sql = exp.to_column(column_name).sql(dialect=self.dialect, identify=True)
896
897        truncated_comment = self._truncate_table_comment(column_comment)
898        comment_sql = exp.Literal.string(truncated_comment).sql(dialect=self.dialect)
899
900        return f"ALTER TABLE {table_sql}{self._on_cluster_sql()} COMMENT COLUMN {column_sql} {comment_sql}"
901
902    def _on_cluster_sql(self) -> str:
903        if self.engine_run_mode.is_cluster:
904            cluster_name = exp.to_identifier(self.cluster, quoted=True).sql(dialect=self.dialect)  #  type: ignore
905            return f" ON CLUSTER {cluster_name} "
906        return ""
logger = <Logger sqlmesh.core.engine_adapter.clickhouse (WARNING)>
sqlmesh.core.engine_adapter.clickhouse

Arguments:

Inherited Members