Edit on GitHub

sqlmesh.utils.pandas

 1from __future__ import annotations
 2
 3import sys
 4import typing as t
 5
 6import numpy as np
 7import pandas as pd
 8from sqlglot import exp
 9
10if t.TYPE_CHECKING:
11    # https://github.com/python/mypy/issues/1153
12    if sys.version_info >= (3, 9):
13        try:
14            from pandas.core.frame import _PandasNamedTuple as PandasNamedTuple
15        except ImportError:
16            PandasNamedTuple = t.Tuple[t.Any, ...]  # type: ignore
17    else:
18        PandasNamedTuple = t.Tuple[t.Any, ...]
19
20
21PANDAS_TYPE_MAPPINGS = {
22    np.dtype("int8"): exp.DataType.build("tinyint"),
23    np.dtype("int16"): exp.DataType.build("smallint"),
24    np.dtype("int32"): exp.DataType.build("int"),
25    np.dtype("int64"): exp.DataType.build("bigint"),
26    np.dtype("float16"): exp.DataType.build("float"),
27    np.dtype("float32"): exp.DataType.build("float"),
28    np.dtype("float64"): exp.DataType.build("double"),
29    np.dtype("O"): exp.DataType.build("text"),
30    np.dtype("bool"): exp.DataType.build("boolean"),
31    np.dtype("datetime64"): exp.DataType.build("timestamp"),
32    np.dtype("datetime64[ns]"): exp.DataType.build("timestamp"),
33    np.dtype("datetime64[us]"): exp.DataType.build("timestamp"),
34    pd.Int8Dtype(): exp.DataType.build("tinyint"),
35    pd.Int16Dtype(): exp.DataType.build("smallint"),
36    pd.Int32Dtype(): exp.DataType.build("int"),
37    pd.Int64Dtype(): exp.DataType.build("bigint"),
38    pd.Float32Dtype(): exp.DataType.build("float"),
39    pd.Float64Dtype(): exp.DataType.build("double"),
40    pd.StringDtype(): exp.DataType.build("text"),  # type: ignore
41    pd.BooleanDtype(): exp.DataType.build("boolean"),
42}
43
44
45def columns_to_types_from_df(df: pd.DataFrame) -> t.Dict[str, exp.DataType]:
46    result = {}
47    for column_name, column_type in df.dtypes.items():
48        exp_type = PANDAS_TYPE_MAPPINGS.get(column_type)
49        if not exp_type:
50            raise ValueError(f"Unsupported pandas type '{column_type}'")
51        result[str(column_name)] = exp_type
52    return result
def columns_to_types_from_df( df: pandas.core.frame.DataFrame) -> Dict[str, sqlglot.expressions.DataType]:
46def columns_to_types_from_df(df: pd.DataFrame) -> t.Dict[str, exp.DataType]:
47    result = {}
48    for column_name, column_type in df.dtypes.items():
49        exp_type = PANDAS_TYPE_MAPPINGS.get(column_type)
50        if not exp_type:
51            raise ValueError(f"Unsupported pandas type '{column_type}'")
52        result[str(column_name)] = exp_type
53    return result