sqlmesh.utils.pandas
1from __future__ import annotations 2 3import sys 4import typing as t 5 6import numpy as np 7import pandas as pd 8from sqlglot import exp 9 10if t.TYPE_CHECKING: 11 # https://github.com/python/mypy/issues/1153 12 if sys.version_info >= (3, 9): 13 try: 14 from pandas.core.frame import _PandasNamedTuple as PandasNamedTuple 15 except ImportError: 16 PandasNamedTuple = t.Tuple[t.Any, ...] # type: ignore 17 else: 18 PandasNamedTuple = t.Tuple[t.Any, ...] 19 20 21PANDAS_TYPE_MAPPINGS = { 22 np.dtype("int8"): exp.DataType.build("tinyint"), 23 np.dtype("int16"): exp.DataType.build("smallint"), 24 np.dtype("int32"): exp.DataType.build("int"), 25 np.dtype("int64"): exp.DataType.build("bigint"), 26 np.dtype("float16"): exp.DataType.build("float"), 27 np.dtype("float32"): exp.DataType.build("float"), 28 np.dtype("float64"): exp.DataType.build("double"), 29 np.dtype("O"): exp.DataType.build("text"), 30 np.dtype("bool"): exp.DataType.build("boolean"), 31 np.dtype("datetime64"): exp.DataType.build("timestamp"), 32 np.dtype("datetime64[ns]"): exp.DataType.build("timestamp"), 33 np.dtype("datetime64[us]"): exp.DataType.build("timestamp"), 34 pd.Int8Dtype(): exp.DataType.build("tinyint"), 35 pd.Int16Dtype(): exp.DataType.build("smallint"), 36 pd.Int32Dtype(): exp.DataType.build("int"), 37 pd.Int64Dtype(): exp.DataType.build("bigint"), 38 pd.Float32Dtype(): exp.DataType.build("float"), 39 pd.Float64Dtype(): exp.DataType.build("double"), 40 pd.StringDtype(): exp.DataType.build("text"), # type: ignore 41 pd.BooleanDtype(): exp.DataType.build("boolean"), 42} 43 44 45def columns_to_types_from_df(df: pd.DataFrame) -> t.Dict[str, exp.DataType]: 46 result = {} 47 for column_name, column_type in df.dtypes.items(): 48 exp_type = PANDAS_TYPE_MAPPINGS.get(column_type) 49 if not exp_type: 50 raise ValueError(f"Unsupported pandas type '{column_type}'") 51 result[str(column_name)] = exp_type 52 return result
def
columns_to_types_from_df( df: pandas.core.frame.DataFrame) -> Dict[str, sqlglot.expressions.DataType]:
46def columns_to_types_from_df(df: pd.DataFrame) -> t.Dict[str, exp.DataType]: 47 result = {} 48 for column_name, column_type in df.dtypes.items(): 49 exp_type = PANDAS_TYPE_MAPPINGS.get(column_type) 50 if not exp_type: 51 raise ValueError(f"Unsupported pandas type '{column_type}'") 52 result[str(column_name)] = exp_type 53 return result