Edit on GitHub
sqlmesh.core.engine_adapter.clickhouse

View Source
  1from __future__ import annotations
  2
  3import typing as t
  4import logging
  5import re
  6from sqlglot import exp, maybe_parse
  7from sqlmesh.core.dialect import to_schema
  8from sqlmesh.core.engine_adapter.mixins import LogicalMergeMixin
  9from sqlmesh.core.engine_adapter.base import EngineAdapterWithIndexSupport
 10from sqlmesh.core.engine_adapter.shared import (
 11    CatalogSupport,
 12    DataObject,
 13    DataObjectType,
 14    EngineRunMode,
 15    SourceQuery,
 16    CommentCreationView,
 17    InsertOverwriteStrategy,
 18)
 19from sqlmesh.core.schema_diff import TableAlterOperation
 20from sqlmesh.utils import get_source_columns_to_types
 21
 22if t.TYPE_CHECKING:
 23    import pandas as pd
 24
 25    from sqlmesh.core._typing import SchemaName, TableName
 26    from sqlmesh.core.engine_adapter._typing import DF, Query, QueryOrDF
 27
 28    from sqlmesh.core.node import IntervalUnit
 29
 30
 31logger = logging.getLogger(__name__)
 32
 33
 34class ClickhouseEngineAdapter(EngineAdapterWithIndexSupport, LogicalMergeMixin):
 35    DIALECT = "clickhouse"
 36    SUPPORTS_TRANSACTIONS = False
 37    SUPPORTS_VIEW_SCHEMA = False
 38    SUPPORTS_REPLACE_TABLE = False
 39    COMMENT_CREATION_VIEW = CommentCreationView.COMMENT_COMMAND_ONLY
 40
 41    SCHEMA_DIFFER_KWARGS = {}
 42
 43    DEFAULT_TABLE_ENGINE = "MergeTree"
 44    ORDER_BY_TABLE_ENGINE_REGEX = "^.*?MergeTree.*$"
 45
 46    @property
 47    def catalog_support(self) -> CatalogSupport:
 48        # This property is intentionally dynamic: it transitions from UNSUPPORTED to
 49        # SINGLE_CATALOG_ONLY after inject_virtual_catalog() sets _default_catalog. Callers must
 50        # not cache the result — always read it live so they see the post-injection state.
 51        if self._default_catalog:
 52            return CatalogSupport.SINGLE_CATALOG_ONLY
 53        return CatalogSupport.UNSUPPORTED
 54
 55    def supports_virtual_catalog(self) -> bool:
 56        return True
 57
 58    def inject_virtual_catalog(self, gateway: str) -> None:
 59        configured = self._extra_config.get("virtual_catalog")
 60        self._default_catalog = f"__{gateway}__" if configured is None else configured
 61
 62    @property
 63    def engine_run_mode(self) -> EngineRunMode:
 64        if self._extra_config.get("cloud_mode"):
 65            return EngineRunMode.CLOUD
 66        # we use the user's specification of a cluster in the connection config to determine if
 67        #   the engine is in cluster mode
 68        if self._extra_config.get("cluster"):
 69            return EngineRunMode.CLUSTER
 70        return EngineRunMode.STANDALONE
 71
 72    @property
 73    def cluster(self) -> t.Optional[str]:
 74        return self._extra_config.get("cluster")
 75
 76    # Workaround for clickhouse-connect cursor bug
 77    # - cursor does not reset row index correctly on `close()`, so `fetchone()` and `fetchmany()`
 78    #     return the wrong (or no) rows after the very first cursor query that returns rows
 79    #     in the connection
 80    # - cursor does reset the data rows correctly on `close()`, so `fetchall()` works because it
 81    #     doesn't use the row index at all
 82    def fetchone(
 83        self,
 84        query: t.Union[exp.Expr, str],
 85        ignore_unsupported_errors: bool = False,
 86        quote_identifiers: bool = False,
 87    ) -> t.Tuple:
 88        with self.transaction():
 89            self.execute(
 90                query,
 91                ignore_unsupported_errors=ignore_unsupported_errors,
 92                quote_identifiers=quote_identifiers,
 93            )
 94            return self.cursor.fetchall()[0]
 95
 96    def _fetch_native_df(
 97        self, query: t.Union[exp.Expr, str], quote_identifiers: bool = False
 98    ) -> pd.DataFrame:
 99        """Fetches a Pandas DataFrame from the cursor"""
100        return self.cursor.client.query_df(
101            self._to_sql(query, quote=quote_identifiers) if isinstance(query, exp.Expr) else query,
102            use_extended_dtypes=True,
103        )
104
105    def _df_to_source_queries(
106        self,
107        df: DF,
108        target_columns_to_types: t.Dict[str, exp.DataType],
109        batch_size: int,
110        target_table: TableName,
111        source_columns: t.Optional[t.List[str]] = None,
112        **kwargs: t.Any,
113    ) -> t.List[SourceQuery]:
114        temp_table = self._get_temp_table(target_table, **kwargs)
115        source_columns_to_types = get_source_columns_to_types(
116            target_columns_to_types, source_columns
117        )
118
119        def query_factory() -> Query:
120            # It is possible for the factory to be called multiple times and if so then the temp table will already
121            # be created so we skip creating again. This means we are assuming the first call is the same result
122            # as later calls.
123            if not self.table_exists(temp_table):
124                self.create_table(
125                    temp_table,
126                    source_columns_to_types,
127                    storage_format=exp.var("MergeTree"),
128                    **kwargs,
129                )
130                ordered_df = df[list(source_columns_to_types)]
131
132                self.cursor.client.insert_df(temp_table.sql(dialect=self.dialect), df=ordered_df)
133
134            return exp.select(*self._casted_columns(target_columns_to_types, source_columns)).from_(
135                temp_table
136            )
137
138        return [
139            SourceQuery(
140                query_factory=query_factory,
141                cleanup_func=lambda: self.drop_table(temp_table, **kwargs),
142            )
143        ]
144
145    def _get_data_objects(
146        self, schema_name: SchemaName, object_names: t.Optional[t.Set[str]] = None
147    ) -> t.List[DataObject]:
148        """
149        Returns all the data objects that exist in the given database.
150        """
151        query = (
152            exp.select(
153                exp.column("database").as_("schema_name"),
154                exp.column("name"),
155                exp.case(exp.column("engine"))
156                .when(
157                    exp.Literal.string("View"),
158                    exp.Literal.string("view"),
159                )
160                .else_(
161                    exp.Literal.string("table"),
162                )
163                .as_("type"),
164            )
165            .from_("system.tables")
166            .where(exp.column("database").eq(to_schema(schema_name).db))
167        )
168        if object_names:
169            query = query.where(exp.column("name").isin(*object_names))
170        df = self.fetchdf(query)
171        return [
172            DataObject(
173                catalog=None,
174                schema=row.schema_name,
175                name=row.name,
176                type=DataObjectType.from_str(row.type),  # type: ignore
177            )
178            for row in df.itertuples()
179        ]
180
181    def create_schema(
182        self,
183        schema_name: SchemaName,
184        ignore_if_exists: bool = True,
185        warn_on_error: bool = True,
186        properties: t.List[exp.Expr] = [],
187    ) -> None:
188        """Create a Clickhouse database from a name or qualified table name.
189
190        Clickhouse has a two-level naming scheme [database].[table].
191        """
192        from sqlmesh.utils.errors import SQLMeshError
193
194        properties_copy = properties.copy()
195        if self.engine_run_mode.is_cluster:
196            properties_copy.append(exp.OnCluster(this=exp.to_identifier(self.cluster)))
197
198        # ClickHouse does not support catalogs. When a virtual catalog has been injected
199        # (self._default_catalog is set), strip it from the schema name. This mirrors the
200        # SINGLE_CATALOG_ONLY branch in the set_catalog decorator, which does not apply here
201        # because this override is not wrapped by @set_catalog().
202        if self._default_catalog:
203            schema_exp = to_schema(schema_name)
204            catalog_name = schema_exp.catalog
205            if catalog_name:
206                if catalog_name != self._default_catalog:
207                    raise SQLMeshError(
208                        f"clickhouse requires that all catalog operations be against a single catalog: "
209                        f"{self._default_catalog}. Provided catalog: {catalog_name}"
210                    )
211                schema_name = self._strip_virtual_catalog(schema_exp)
212
213        # can't call super() because it will try to set a catalog
214        return self._create_schema(
215            schema_name=schema_name,
216            ignore_if_exists=ignore_if_exists,
217            warn_on_error=warn_on_error,
218            properties=properties_copy,
219            # sqlglot transpiles CREATE SCHEMA to CREATE DATABASE, but this text is used in an error message
220            kind="DATABASE",
221        )
222
223    def _insert_overwrite_by_condition(
224        self,
225        table_name: TableName,
226        source_queries: t.List[SourceQuery],
227        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
228        where: t.Optional[exp.Condition] = None,
229        insert_overwrite_strategy_override: t.Optional[InsertOverwriteStrategy] = None,
230        **kwargs: t.Any,
231    ) -> None:
232        """
233        Implements the table or partition swap approach to insert-overwriting records.
234
235        Because this method executes multiple variants (full table replace, replace by time
236        range, replace by key, replace by partition), some upstream caller info is needed and
237        passed via kwargs.
238
239        Args:
240            table_name: Name of target table
241            source_queries: Source queries returning records to insert
242            target_columns_to_types: Column names and data types of target table
243            where: SQLGlot expression determining which target table rows should be overwritten
244            insert_overwrite_strategy_override: Not used by Clickhouse
245            kwargs:
246                dynamic_key: Key columns (replace by key only)
247                dynamic_key_exp: Expression to build key (replace by key only)
248                dynamic_key_unique: Whether more than one record can exist per key value (replace by key only)
249
250                keep_existing_partition_rows: Whether to overwrite partitions with only new records (incremental by partition only)
251
252        Returns:
253            Side effects only: execution of insert-overwrite operation.
254        """
255        target_table = exp.to_table(table_name)
256        target_columns_to_types = target_columns_to_types or self.columns(target_table)
257
258        temp_table = self._get_temp_table(target_table)
259        self.create_table_like(temp_table, target_table)
260
261        # REPLACE BY KEY: extract kwargs if present
262        dynamic_key = kwargs.get("dynamic_key")
263        if dynamic_key:
264            dynamic_key_exp = t.cast(exp.Expr, kwargs.get("dynamic_key_exp"))
265            dynamic_key_unique = t.cast(bool, kwargs.get("dynamic_key_unique"))
266
267        try:
268            # insert new records into temp table
269            for source_query in source_queries:
270                with source_query as query:
271                    # REPLACE BY KEY: if unique key, DISTINCTify by key columns so only one row is present per key
272                    if dynamic_key and dynamic_key_unique:
273                        query = query.distinct(*dynamic_key)  # type: ignore
274
275                    query = self._order_projections_and_filter(
276                        query, target_columns_to_types, where=where
277                    )
278                    self._insert_append_query(
279                        temp_table,
280                        query,
281                        target_columns_to_types=target_columns_to_types,
282                        order_projections=False,
283                    )
284
285            # REPLACE BY KEY: build `where` expression as "key IN (new rows' key values)"
286            if dynamic_key:
287                key_query = exp.select(dynamic_key_exp).from_(temp_table)
288                if not dynamic_key_unique:
289                    key_query = key_query.distinct()
290                where = dynamic_key_exp.isin(query=key_query)
291
292            # get target table partition key to confirm it's actually partitioned
293            table_partition_exp = self.fetchone(
294                exp.select("partition_key")
295                .from_("system.tables")
296                .where(
297                    exp.column("database").eq(target_table.db),
298                    exp.column("name").eq(target_table.name),
299                )
300            )
301
302            all_affected_partitions: t.Set[str] = set()
303
304            if where:
305                # identify existing records to keep by inverting the delete `where` clause
306                existing_records_insert_exp = exp.insert(
307                    self._select_columns(target_columns_to_types)
308                    .from_(target_table)
309                    .where(exp.paren(expression=where).not_()),
310                    temp_table,
311                )
312
313                # if target table is partitioned, modify insert expression to only insert
314                #   existing records that are in one of the affected partitions
315                if table_partition_exp:
316                    partitions_temp_table_name = self._get_temp_table(
317                        exp.to_table(f"{target_table.db}._affected_partitions")
318                    )
319                    all_affected_partitions, existing_records_insert_exp = (
320                        self._get_affected_partitions_and_insert_exp(
321                            target_table,
322                            temp_table,
323                            where,
324                            existing_records_insert_exp,
325                            partitions_temp_table_name,
326                        )
327                    )
328
329                try:
330                    self.execute(existing_records_insert_exp, track_rows_processed=True)
331                finally:
332                    if table_partition_exp:
333                        self.drop_table(partitions_temp_table_name)
334
335            # process by partition if:
336            #   1. The table is partitioned AND
337            #   (2a. There are existing records to keep (`where`) OR
338            #    2b. We're overwriting existing partition rows (incremental by partition model))
339            if table_partition_exp and (
340                where or kwargs.get("keep_existing_partition_rows") is False
341            ):
342                # only replace partitions that have records in temp_table
343                partitions_to_replace = self._get_partition_ids(temp_table)
344
345                # drop affected partitions that have no records in temp_table
346                #   - NOTE: `all_affected_partitions` will be empty when keep_existing_partition_rows=False
347                #      because previous code block is skipped
348                partitions_to_drop = all_affected_partitions - partitions_to_replace
349
350                if partitions_to_replace or partitions_to_drop:
351                    self.alter_table(
352                        [
353                            self._build_alter_partition_exp(
354                                target_table, temp_table, partitions_to_replace, partitions_to_drop
355                            )
356                        ]
357                    )
358            else:
359                self._exchange_tables(target_table, temp_table)
360        finally:
361            self.drop_table(temp_table)
362
363    def _get_affected_partitions_and_insert_exp(
364        self,
365        target_table: exp.Table,
366        temp_table: exp.Table,
367        where: exp.Condition,
368        existing_records_insert_exp: exp.Insert,
369        partitions_temp_table_name: exp.Table,
370    ) -> tuple[t.Set[str], exp.Insert]:
371        # identify all affected partition IDs
372        #   - store in temp table so we can reuse results
373        self.ctas(
374            partitions_temp_table_name,
375            exp.select("partition_id")
376            .distinct()
377            .from_(
378                exp.union(
379                    # target table partitions with records in `where`
380                    exp.select(exp.column("_partition_id").as_("partition_id"))
381                    .from_(target_table)
382                    .where(where),
383                    # temp table partitions with new records to insert
384                    exp.select(
385                        exp.column("_partition_id").as_("partition_id"),
386                    ).from_(temp_table),
387                ).subquery("_affected_partitions")
388            ),
389        )
390
391        # read all affected partition IDs into memory
392        all_affected_partitions = self._get_partition_ids(
393            partitions_temp_table_name, "partition_id"
394        )
395
396        # limit existing records insert expression WHERE to affected target table partitions
397        #   by adding `AND _partition_id IN (SELECT partition_id FROM partitions_temp_table)`
398        existing_records_insert_exp.set(
399            "expression",
400            existing_records_insert_exp.expression.where(
401                exp.column("_partition_id").isin(
402                    exp.select("partition_id").from_(partitions_temp_table_name)
403                )
404            ),
405        )
406
407        return all_affected_partitions, existing_records_insert_exp
408
409    def _build_alter_partition_exp(
410        self,
411        target_table: exp.Table,
412        temp_table: exp.Table,
413        partitions_to_replace: t.Set[str],
414        partitions_to_drop: t.Set[str],
415    ) -> exp.Alter:
416        alter_expr = exp.Alter(this=target_table, kind="TABLE")
417
418        for partition in partitions_to_replace:
419            alter_expr.append(
420                "actions",
421                exp.ReplacePartition(
422                    expression=exp.Partition(
423                        expressions=[exp.PartitionId(this=exp.Literal.string(str(partition)))]
424                    ),
425                    source=temp_table,
426                ),
427            )
428
429        for partition in partitions_to_drop:
430            alter_expr.append(
431                "actions",
432                exp.DropPartition(
433                    expressions=[
434                        exp.Partition(
435                            expressions=[exp.PartitionId(this=exp.Literal.string(str(partition)))]
436                        )
437                    ],
438                    source=temp_table,
439                ),
440            )
441
442        return alter_expr
443
444    def _replace_by_key(
445        self,
446        target_table: TableName,
447        source_table: QueryOrDF,
448        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]],
449        key: t.Sequence[exp.Expr],
450        is_unique_key: bool,
451        source_columns: t.Optional[t.List[str]] = None,
452    ) -> None:
453        source_queries, target_columns_to_types = self._get_source_queries_and_columns_to_types(
454            source_table,
455            target_columns_to_types,
456            target_table=target_table,
457            source_columns=source_columns,
458        )
459
460        key_exp = (
461            exp.func("CONCAT_WS", "'__SQLMESH_DELIM__'", *key, dialect=self.dialect)
462            if len(key) > 1
463            else key[0]
464        )
465
466        self._insert_overwrite_by_condition(
467            target_table,
468            source_queries,
469            target_columns_to_types,
470            dynamic_key=key,
471            dynamic_key_exp=key_exp,
472            dynamic_key_unique=is_unique_key,
473        )
474
475    def insert_overwrite_by_partition(
476        self,
477        table_name: TableName,
478        query_or_df: QueryOrDF,
479        partitioned_by: t.List[exp.Expr],
480        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
481        source_columns: t.Optional[t.List[str]] = None,
482    ) -> None:
483        table_name = self._strip_virtual_catalog(table_name)
484        source_queries, target_columns_to_types = self._get_source_queries_and_columns_to_types(
485            query_or_df,
486            target_columns_to_types,
487            target_table=table_name,
488            source_columns=source_columns,
489        )
490
491        self._insert_overwrite_by_condition(
492            table_name, source_queries, target_columns_to_types, keep_existing_partition_rows=False
493        )
494
495    def _create_table_like(
496        self,
497        target_table_name: TableName,
498        source_table_name: TableName,
499        exists: bool,
500        **kwargs: t.Any,
501    ) -> None:
502        """Create table with identical structure as source table"""
503        self.execute(
504            f"CREATE TABLE {target_table_name}{self._on_cluster_sql()} AS {source_table_name}"
505        )
506
507    def _get_partition_ids(
508        self,
509        table: exp.Table,
510        partition_col_name: str = "_partition_id",
511        where: t.Optional[exp.Condition] = None,
512        limit: t.Optional[int] = None,
513    ) -> t.Set[t.Any]:
514        """List partition IDs present in table"""
515        partitions_query = exp.select(partition_col_name).distinct().from_(table)
516        if where:
517            partitions_query = partitions_query.where(where)
518        if limit:
519            partitions_query = partitions_query.limit(limit)
520        partitions = self.fetchall(partitions_query)
521
522        return set([part[0] for part in partitions] if partitions else [])
523
524    def _create_table(
525        self,
526        table_name_or_schema: t.Union[exp.Schema, TableName],
527        expression: t.Optional[exp.Expr],
528        exists: bool = True,
529        replace: bool = False,
530        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
531        table_description: t.Optional[str] = None,
532        column_descriptions: t.Optional[t.Dict[str, str]] = None,
533        table_kind: t.Optional[str] = None,
534        track_rows_processed: bool = True,
535        **kwargs: t.Any,
536    ) -> None:
537        """Creates a table in the database.
538
539        Clickhouse Cloud requires doing CTAS in two steps.
540
541        First, we add the `EMPTY` property to the CTAS call to create a table with the proper
542        schema, then insert the data with the CTAS query.
543        """
544        # ensure columns used for partitioning are non-Nullable
545        #   - normally user's responsibility, but we automatically partition by time column in
546        #       incremental by time models
547        if kwargs.get("partitioned_by"):
548            partition_cols = [
549                col.name
550                for part_expr in kwargs["partitioned_by"]
551                for col in part_expr.find_all(exp.Column)
552            ]
553            if isinstance(table_name_or_schema, exp.Schema):
554                for coldef in table_name_or_schema.expressions:
555                    if coldef.name in partition_cols:
556                        coldef.kind.set("nullable", False)
557            if target_columns_to_types:
558                for col in partition_cols:
559                    target_columns_to_types[col].set("nullable", False)
560
561        super()._create_table(
562            table_name_or_schema,
563            expression,
564            exists,
565            replace,
566            target_columns_to_types,
567            table_description,
568            column_descriptions,
569            table_kind,
570            empty_ctas=(self.engine_run_mode.is_cloud and expression is not None),
571            track_rows_processed=track_rows_processed,
572            **kwargs,
573        )
574
575        # execute the second INSERT step if on cloud and creating a table
576        # - Additional clause is to avoid clickhouse-connect HTTP client bug where CTAS LIMIT 0
577        #     returns a success code but malformed response
578        if (
579            self.engine_run_mode.is_cloud
580            and table_kind != "VIEW"
581            and expression
582            and not (
583                expression.args.get("limit") is not None
584                and expression.args["limit"].expression.this == "0"
585            )
586        ):
587            table_name = (
588                table_name_or_schema.this
589                if isinstance(table_name_or_schema, exp.Schema)
590                else table_name_or_schema
591            )
592            self._insert_append_query(
593                table_name,
594                expression,  # type: ignore
595                target_columns_to_types or self.columns(table_name),
596            )
597
598    def _strip_virtual_catalog(self, name: "TableName") -> exp.Table:
599        """Strip the virtual catalog prefix from a table name if present.
600
601        When a virtual catalog has been injected, ClickHouse table names carry a
602        synthetic catalog prefix (e.g. ``__gw__``) so they match the 3-level FQN
603        depth of catalog-aware peers.  This helper removes that prefix before any
604        SQL is sent to the wire, since ClickHouse only supports a two-level
605        ``[database].[table]`` naming scheme.
606        """
607        table = exp.to_table(name)
608        if self._default_catalog and table.catalog == self._default_catalog:
609            table.set("catalog", None)
610        return table
611
612    def _exchange_tables(
613        self,
614        old_table_name: TableName,
615        new_table_name: TableName,
616    ) -> None:
617        from clickhouse_connect.driver.exceptions import DatabaseError  # type: ignore
618
619        old_table_sql = exp.to_table(old_table_name).sql(dialect=self.dialect, identify=True)
620        new_table_sql = exp.to_table(new_table_name).sql(dialect=self.dialect, identify=True)
621
622        try:
623            self.execute(
624                f"EXCHANGE TABLES {old_table_sql} AND {new_table_sql}{self._on_cluster_sql()}"
625            )
626        except DatabaseError as e:
627            if "NOT_IMPLEMENTED" in str(e):
628                # If someone is using an old Clickhouse version, an OS that doesn't support atomic exchanges,
629                # or a database engine that doesn't support atomic exchanges, we do a non-atomic rename instead.
630                #
631                # Executing multiple renames in one call like `RENAME TABLE a to b, c to a` is supported
632                # but not an atomic operation. Because it is not atomic, doing it in two calls is equivalent
633                # and does not require defining an additional method.
634                throwaway_table_name = self._get_temp_table(old_table_name)
635                self._rename_table(old_table_name, throwaway_table_name)
636                self._rename_table(new_table_name, old_table_name)
637                self.drop_table(throwaway_table_name)
638
639    def _rename_table(
640        self,
641        old_table_name: TableName,
642        new_table_name: TableName,
643    ) -> None:
644        old_table_sql = exp.to_table(old_table_name).sql(dialect=self.dialect, identify=True)
645        new_table_sql = exp.to_table(new_table_name).sql(dialect=self.dialect, identify=True)
646
647        self.execute(f"RENAME TABLE {old_table_sql} TO {new_table_sql}{self._on_cluster_sql()}")
648
649    def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expr]) -> None:
650        delete_expr = exp.delete(self._strip_virtual_catalog(table_name), where)
651        if self.engine_run_mode.is_cluster:
652            delete_expr.set("cluster", exp.OnCluster(this=exp.to_identifier(self.cluster)))
653        self.execute(delete_expr)
654
655    def alter_table(
656        self,
657        alter_expressions: t.Union[t.List[exp.Alter], t.List[TableAlterOperation]],
658    ) -> None:
659        """
660        Performs the alter statements to change the current table into the structure of the target table.
661        """
662        with self.transaction():
663            for alter_expression in [
664                x.expression if isinstance(x, TableAlterOperation) else x for x in alter_expressions
665            ]:
666                if self._default_catalog and isinstance(alter_expression.this, exp.Table):
667                    if alter_expression.this.catalog == self._default_catalog:
668                        alter_expression.this.set("catalog", None)
669                if self.engine_run_mode.is_cluster:
670                    alter_expression.set(
671                        "cluster", exp.OnCluster(this=exp.to_identifier(self.cluster))
672                    )
673                self.execute(alter_expression)
674
675    def _drop_object(
676        self,
677        name: TableName | SchemaName,
678        exists: bool = True,
679        kind: str = "TABLE",
680        cascade: bool = False,
681        **drop_args: t.Any,
682    ) -> None:
683        """Drops an object.
684
685        An object could be a DATABASE, SCHEMA, VIEW, TABLE, DYNAMIC TABLE, TEMPORARY TABLE etc depending on the :kind.
686
687        Args:
688            name: The name of the table to drop.
689            exists: If exists, defaults to True.
690            kind: What kind of object to drop. Defaults to TABLE
691            **drop_args: Any extra arguments to set on the Drop expression
692        """
693        super()._drop_object(
694            name=name,
695            exists=exists,
696            kind=kind,
697            cascade=cascade,
698            cluster=exp.OnCluster(this=exp.to_identifier(self.cluster))
699            if self.engine_run_mode.is_cluster
700            else None,
701            **drop_args,
702        )
703
704    def _build_partitioned_by_exp(
705        self,
706        partitioned_by: t.List[exp.Expr],
707        **kwargs: t.Any,
708    ) -> t.Optional[t.Union[exp.PartitionedByProperty, exp.Property]]:
709        return exp.PartitionedByProperty(
710            this=exp.Schema(expressions=partitioned_by),
711        )
712
713    def ensure_nulls_for_unmatched_after_join(
714        self,
715        query: Query,
716    ) -> Query:
717        # Set `join_use_nulls = 1` in a query's SETTINGS clause
718        query.append("settings", exp.var("join_use_nulls").eq(exp.Literal.number("1")))
719        return query
720
721    def use_server_nulls_for_unmatched_after_join(
722        self,
723        query: Query,
724    ) -> Query:
725        # Set the `join_use_nulls` server value in a query's SETTINGS clause
726        #
727        # Use in SCD models:
728        #  - The SCD query we build must include the setting `join_use_nulls = 1` to ensure that empty cells in a join
729        #      are filled with NULL instead of the default data type value. The default join_use_nulls value is `0`.
730        #  - The SCD embeds the user's original query in the `source` CTE
731        #  - Settings are dynamically scoped, so our setting may override the server's default setting the user expects
732        #      for their query.
733        #  - To prevent this, we:
734        #     - If the user query sets `join_use_nulls`, we do nothing
735        #     - If the user query does not set `join_use_nulls`, we query the server for the current setting
736        #       - If the server value is 1, we do nothing
737        #       - If the server values is not 1, we inject its `join_use_nulls` value into the user query
738        #     - We do not need to check user subqueries because our injected setting operates at the same scope the
739        #         server value would normally operate at
740        setting_name = "join_use_nulls"
741        setting_value = "1"
742
743        user_settings = query.args.get("settings")
744        # if user has not already set it explicitly
745        if not (
746            user_settings
747            and any(
748                [
749                    isinstance(setting, exp.EQ) and setting.name == setting_name
750                    for setting in user_settings
751                ]
752            )
753        ):
754            server_value = self.fetchone(
755                exp.select("value")
756                .from_("system.settings")
757                .where(exp.column("name").eq(exp.Literal.string(setting_name)))
758            )[0]
759            # only inject the setting if the server value isn't 1
760            inject_setting = setting_value != server_value
761            setting_value = server_value if inject_setting else setting_value
762
763            if inject_setting:
764                query.append(
765                    "settings", exp.var(setting_name).eq(exp.Literal.number(setting_value))
766                )
767
768        return query
769
770    def _build_settings_property(
771        self, settings: t.Mapping[str, exp.Expr | str | int | float]
772    ) -> exp.SettingsProperty:
773        # ClickHouse requires every key=value pair to live under a single
774        # SETTINGS clause (`SETTINGS a = 1, b = 2`). Emitting one
775        # SettingsProperty per pair produces repeated SETTINGS keywords and a
776        # syntax error at execution time.
777        return exp.SettingsProperty(
778            expressions=[
779                exp.EQ(
780                    this=exp.var(key.lower()),
781                    expression=value
782                    if isinstance(value, exp.Expr)
783                    else exp.Literal(this=value, is_string=isinstance(value, str)),
784                )
785                for key, value in settings.items()
786            ]
787        )
788
789    def _build_table_properties_exp(
790        self,
791        catalog_name: t.Optional[str] = None,
792        table_format: t.Optional[str] = None,
793        storage_format: t.Optional[str] = None,
794        partitioned_by: t.Optional[t.List[exp.Expr]] = None,
795        partition_interval_unit: t.Optional[IntervalUnit] = None,
796        clustered_by: t.Optional[t.List[exp.Expr]] = None,
797        table_properties: t.Optional[t.Dict[str, exp.Expr]] = None,
798        target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
799        table_description: t.Optional[str] = None,
800        table_kind: t.Optional[str] = None,
801        empty_ctas: bool = False,
802        **kwargs: t.Any,
803    ) -> t.Optional[exp.Properties]:
804        properties: t.List[exp.Expr] = []
805
806        table_engine = self.DEFAULT_TABLE_ENGINE
807        if storage_format:
808            table_engine = (
809                storage_format.this if isinstance(storage_format, exp.Var) else storage_format  # type: ignore
810            )
811        properties.append(exp.EngineProperty(this=table_engine))
812
813        # copy of table_properties so we can pop items off below then consume the rest later
814        table_properties_copy = {
815            k.upper(): v for k, v in (table_properties.copy() if table_properties else {}).items()
816        }
817
818        mergetree_engine = bool(re.search(self.ORDER_BY_TABLE_ENGINE_REGEX, table_engine))
819        ordered_by_raw = table_properties_copy.pop("ORDER_BY", None)
820        if mergetree_engine:
821            ordered_by_exprs = []
822            if ordered_by_raw:
823                ordered_by_vals = []
824
825                if isinstance(ordered_by_raw, (exp.Tuple, exp.Array)):
826                    ordered_by_vals = ordered_by_raw.expressions
827                if isinstance(ordered_by_raw, exp.Paren):
828                    ordered_by_vals = [ordered_by_raw.this]
829
830                if not ordered_by_vals:
831                    ordered_by_vals = (
832                        ordered_by_raw if isinstance(ordered_by_raw, list) else [ordered_by_raw]
833                    )
834
835                for col in ordered_by_vals:
836                    ordered_by_exprs.append(
837                        col
838                        if isinstance(col, exp.Column)
839                        else maybe_parse(
840                            col.name if isinstance(col, exp.Literal) else col,
841                            dialect=self.dialect,
842                            into=exp.Ordered,
843                        )
844                    )
845
846            properties.append(exp.Order(expressions=[exp.Tuple(expressions=ordered_by_exprs)]))
847
848        primary_key = table_properties_copy.pop("PRIMARY_KEY", None)
849        if mergetree_engine and primary_key:
850            primary_key_vals = []
851            if isinstance(primary_key, (exp.Tuple, exp.Array)):
852                primary_key_vals = primary_key.expressions
853            if isinstance(ordered_by_raw, exp.Paren):
854                primary_key_vals = [primary_key.this]
855
856            if not primary_key_vals:
857                primary_key_vals = primary_key if isinstance(primary_key, list) else [primary_key]
858
859            properties.append(
860                exp.PrimaryKey(
861                    expressions=[
862                        exp.to_column(k.name if isinstance(k, exp.Literal) else k)
863                        for k in primary_key_vals
864                    ]
865                )
866            )
867
868        ttl = table_properties_copy.pop("TTL", None)
869        if ttl:
870            properties.append(
871                exp.MergeTreeTTL(expressions=[ttl if isinstance(ttl, exp.Expr) else exp.var(ttl)])
872            )
873
874        if (
875            partitioned_by
876            and (partitioned_by_prop := self._build_partitioned_by_exp(partitioned_by)) is not None
877        ):
878            properties.append(partitioned_by_prop)
879
880        if self.engine_run_mode.is_cluster:
881            properties.append(exp.OnCluster(this=exp.to_identifier(self.cluster)))
882
883        if empty_ctas:
884            properties.append(exp.EmptyProperty())
885
886        if table_properties_copy:
887            properties.append(self._build_settings_property(table_properties_copy))
888
889        if table_description:
890            properties.append(
891                exp.SchemaCommentProperty(
892                    this=exp.Literal.string(self._truncate_table_comment(table_description))
893                )
894            )
895
896        if properties:
897            return exp.Properties(expressions=properties)
898
899        return None
900
901    def _build_view_properties_exp(
902        self,
903        view_properties: t.Optional[t.Dict[str, exp.Expr]] = None,
904        table_description: t.Optional[str] = None,
905        **kwargs: t.Any,
906    ) -> t.Optional[exp.Properties]:
907        """Creates a SQLGlot table properties expression for view"""
908        properties: t.List[exp.Expr] = []
909
910        view_properties_copy = view_properties.copy() if view_properties else {}
911
912        if self.engine_run_mode.is_cluster:
913            properties.append(exp.OnCluster(this=exp.to_identifier(self.cluster)))
914
915        if view_properties_copy:
916            properties.append(self._build_settings_property(view_properties_copy))
917
918        if table_description:
919            properties.append(
920                exp.SchemaCommentProperty(
921                    this=exp.Literal.string(self._truncate_table_comment(table_description))
922                )
923            )
924
925        if properties:
926            return exp.Properties(expressions=properties)
927        return None
928
929    def _build_create_comment_table_exp(
930        self, table: exp.Table, table_comment: str, table_kind: str, **kwargs: t.Any
931    ) -> exp.Comment | str:
932        table_sql = table.sql(dialect=self.dialect, identify=True)
933
934        truncated_comment = self._truncate_table_comment(table_comment)
935        comment_sql = exp.Literal.string(truncated_comment).sql(dialect=self.dialect)
936
937        return f"ALTER TABLE {table_sql}{self._on_cluster_sql()} MODIFY COMMENT {comment_sql}"
938
939    def _build_create_comment_column_exp(
940        self,
941        table: exp.Table,
942        column_name: str,
943        column_comment: str,
944        table_kind: str = "TABLE",
945        **kwargs: t.Any,
946    ) -> exp.Comment | str:
947        table_sql = table.sql(dialect=self.dialect, identify=True)
948        column_sql = exp.to_column(column_name).sql(dialect=self.dialect, identify=True)
949
950        truncated_comment = self._truncate_table_comment(column_comment)
951        comment_sql = exp.Literal.string(truncated_comment).sql(dialect=self.dialect)
952
953        return f"ALTER TABLE {table_sql}{self._on_cluster_sql()} COMMENT COLUMN {column_sql} {comment_sql}"
954
955    def _on_cluster_sql(self) -> str:
956        if self.engine_run_mode.is_cluster:
957            cluster_name = exp.to_identifier(self.cluster, quoted=True).sql(dialect=self.dialect)  #  type: ignore
958            return f" ON CLUSTER {cluster_name} "
959        return ""
logger = <Logger sqlmesh.core.engine_adapter.clickhouse (WARNING)>
sqlmesh.core.engine_adapter.clickhouse

Arguments:

Inherited Members