Edit on GitHub

sqlmesh.utils.date

  1from __future__ import annotations
  2
  3import re
  4import time
  5import typing as t
  6import warnings
  7
  8from datetime import date, datetime, timedelta, timezone, tzinfo
  9
 10import dateparser
 11from dateparser import freshness_date_parser as freshness_date_parser_module
 12from dateparser.freshness_date_parser import freshness_date_parser
 13from sqlglot import exp
 14
 15from sqlmesh.utils import ttl_cache
 16
 17if t.TYPE_CHECKING:
 18    import pandas as pd
 19
 20    from sqlglot.dialects.dialect import DialectType
 21
 22UTC = timezone.utc
 23TimeLike = t.Union[date, datetime, str, int, float]
 24DatetimeRange = t.Tuple[datetime, datetime]
 25DatetimeRanges = t.List[DatetimeRange]
 26DATE_INT_FMT = "%Y%m%d"
 27
 28
 29warnings.filterwarnings(
 30    "ignore",
 31    message="The localize method is no longer necessary, as this time zone supports the fold attribute",
 32)
 33
 34
 35# The Freshness Date Data Parser doesn't support plural units so we add the `s?` to the expression
 36freshness_date_parser_module.PATTERN = re.compile(  # type: ignore
 37    r"(\d+[.,]?\d*)\s*(%s)s?\b" % freshness_date_parser_module._UNITS,  # type: ignore
 38    re.I | re.S | re.U,  # type: ignore
 39)
 40DAY_SHORTCUT_EXPRESSIONS = {"today", "yesterday", "tomorrow"}
 41TIME_UNITS = {"hours", "minutes", "seconds"}
 42TEMPORAL_TZ_TYPES = {
 43    exp.DataType.Type.TIMETZ,
 44    exp.DataType.Type.TIMESTAMPTZ,
 45    exp.DataType.Type.TIMESTAMPLTZ,
 46}
 47
 48
 49def now(minute_floor: bool = True) -> datetime:
 50    """
 51    Current utc datetime with optional minute level accuracy / granularity.
 52
 53    minute_floor is set to True by default
 54
 55    Args:
 56        minute_floor: If true (default), removes the second and microseconds from the current datetime.
 57
 58    Returns:
 59        A datetime object with tz utc.
 60    """
 61    now = datetime.now(tz=UTC)
 62    if minute_floor:
 63        return now.replace(second=0, microsecond=0, tzinfo=UTC)
 64    return now.replace(tzinfo=UTC)
 65
 66
 67def now_timestamp(minute_floor: bool = False) -> int:
 68    """
 69    Current utc timestamp.
 70
 71    Args:
 72        minute_floor: If true, removes the second and microseconds from the current datetime.
 73
 74    Returns:
 75        UTC epoch millis timestamp
 76    """
 77    return to_timestamp(now(minute_floor))
 78
 79
 80def now_ds() -> str:
 81    """
 82    Current utc ds.
 83
 84    Returns:
 85        Today's ds string.
 86    """
 87    return to_ds(now())
 88
 89
 90def yesterday(relative_base: t.Optional[datetime] = None) -> datetime:
 91    """
 92    Yesterday utc datetime.
 93
 94    Returns:
 95        A datetime object with tz utc representing yesterday's date
 96    """
 97    return to_datetime("yesterday", relative_base=relative_base)
 98
 99
100def yesterday_ds(relative_base: t.Optional[datetime] = None) -> str:
101    """
102    Yesterday utc ds.
103
104    Returns:
105        Yesterday's ds string.
106    """
107    return to_ds("yesterday", relative_base=relative_base)
108
109
110def yesterday_timestamp(relative_base: t.Optional[datetime] = None) -> int:
111    """
112    Yesterday utc timestamp.
113
114    Returns:
115        UTC epoch millis timestamp of yesterday
116    """
117    return to_timestamp(yesterday(relative_base=relative_base))
118
119
120def to_timestamp(
121    value: TimeLike,
122    relative_base: t.Optional[datetime] = None,
123    check_categorical_relative_expression: bool = True,
124) -> int:
125    """
126    Converts a value into an epoch millis timestamp.
127
128    Args:
129        value: A variety of date formats. If value is a string, it must be in iso format.
130        relative_base: The datetime to reference for time expressions that are using relative terms
131        check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
132
133    Returns:
134        Epoch millis timestamp.
135    """
136    return int(
137        to_datetime(
138            value,
139            relative_base=relative_base,
140            check_categorical_relative_expression=check_categorical_relative_expression,
141        ).timestamp()
142        * 1000
143    )
144
145
146@ttl_cache()
147def to_datetime(
148    value: TimeLike,
149    relative_base: t.Optional[datetime] = None,
150    check_categorical_relative_expression: bool = True,
151    tz: t.Optional[tzinfo] = None,
152) -> datetime:
153    """Converts a value into a UTC datetime object.
154
155    Args:
156        value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
157        relative_base: The datetime to reference for time expressions that are using relative terms.
158        check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
159        tz: Timezone to convert datetime to, defaults to utc
160
161    Raises:
162        ValueError if value cannot be converted to a datetime.
163
164    Returns:
165        A datetime object with tz (default UTC).
166    """
167    if isinstance(value, datetime):
168        dt: t.Optional[datetime] = value
169    elif isinstance(value, date):
170        dt = datetime(value.year, value.month, value.day)
171    elif isinstance(value, exp.Expr):
172        return to_datetime(value.name)
173    else:
174        try:
175            epoch = float(value)
176        except ValueError:
177            epoch = None
178
179        if epoch is None:
180            relative_base = relative_base or now()
181            expression = str(value)
182            if check_categorical_relative_expression and is_categorical_relative_expression(
183                expression
184            ):
185                relative_base = relative_base.replace(hour=0, minute=0, second=0, microsecond=0)
186
187            # note: we hardcode TIMEZONE: UTC to work around this bug: https://github.com/scrapinghub/dateparser/issues/896
188            # where dateparser just silently fails if it cant interpret the contents of /etc/localtime
189            # this works because SQLMesh only deals with UTC, there is no concept of user local time
190            dt = dateparser.parse(
191                expression, settings={"RELATIVE_BASE": relative_base, "TIMEZONE": "UTC"}
192            )
193        else:
194            try:
195                dt = datetime.strptime(str(value), DATE_INT_FMT)
196            except ValueError:
197                dt = datetime.fromtimestamp(epoch / 1000.0, tz=UTC)
198
199    if dt is None:
200        raise ValueError(f"Could not convert `{value}` to datetime.")
201
202    tz = tz or UTC
203
204    if dt.tzinfo:
205        return dt if dt.tzinfo == tz else dt.astimezone(tz)
206    return dt.replace(tzinfo=tz)
207
208
209def to_date(value: TimeLike, relative_base: t.Optional[datetime] = None) -> date:
210    """Converts a value into a UTC date object
211    Args:
212        value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
213        relative_base: The datetime to reference for time expressions that are using relative terms
214
215    Raises:
216        ValueError if value cannot be converted to a date.
217
218    Returns:
219        A date object with tz utc.
220    """
221    return to_datetime(value, relative_base).date()
222
223
224def date_dict(
225    execution_time: TimeLike,
226    start: t.Optional[TimeLike],
227    end: t.Optional[TimeLike],
228) -> t.Dict[str, TimeLike]:
229    """Creates a kwarg dictionary of datetime variables for use in SQL Contexts.
230
231    Keys are like start_date, start_ds, end_date, end_ds...
232
233    Args:
234        execution_time: Execution time.
235        start: Start time.
236        end: End time.
237
238    Returns:
239        A dictionary with various keys pointing to datetime formats.
240    """
241    kwargs: t.Dict[str, t.Union[str, datetime, date, float, int]] = {}
242
243    execution_dt = to_datetime(execution_time)
244    prefixes = [
245        ("latest", execution_dt),  # TODO: Preserved for backward compatibility. Remove in 1.0.0.
246        ("execution", execution_dt),
247    ]
248
249    if start is not None:
250        prefixes.append(("start", to_datetime(start)))
251    if end is not None:
252        prefixes.append(("end", to_datetime(end)))
253
254    for prefix, time_like in prefixes:
255        dt = to_datetime(time_like)
256        dtntz = dt.replace(tzinfo=None)
257
258        millis = to_timestamp(time_like)
259
260        kwargs[f"{prefix}_dt"] = dt
261        kwargs[f"{prefix}_dtntz"] = dtntz
262        kwargs[f"{prefix}_date"] = to_date(dt)
263        kwargs[f"{prefix}_ds"] = to_ds(time_like)
264        kwargs[f"{prefix}_ts"] = to_ts(dt)
265        kwargs[f"{prefix}_tstz"] = to_tstz(dt)
266        kwargs[f"{prefix}_epoch"] = millis / 1000
267        kwargs[f"{prefix}_millis"] = millis
268        kwargs[f"{prefix}_hour"] = dt.hour
269
270    return kwargs
271
272
273def to_ds(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str:
274    """Converts a TimeLike object into YYYY-MM-DD formatted string."""
275    return to_ts(obj, relative_base=relative_base)[0:10]
276
277
278def to_ts(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str:
279    """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string."""
280    return to_datetime(obj, relative_base=relative_base).replace(tzinfo=None).isoformat(sep=" ")
281
282
283def to_tstz(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str:
284    """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string."""
285    return to_datetime(obj, relative_base=relative_base).isoformat(sep=" ")
286
287
288def is_date(obj: TimeLike) -> bool:
289    """Checks if a TimeLike object should be treated like a date."""
290    if isinstance(obj, date) and not isinstance(obj, datetime):
291        return True
292
293    try:
294        time.strptime(str(obj).replace("-", ""), DATE_INT_FMT)
295        return True
296    except ValueError:
297        return False
298
299
300def make_inclusive(
301    start: TimeLike, end: TimeLike, dialect: t.Optional[DialectType] = ""
302) -> DatetimeRange:
303    """Adjust start and end times to to become inclusive datetimes.
304
305    SQLMesh treats start and end times as inclusive so that filters can be written as
306
307    SELECT * FROM x WHERE ds BETWEEN @start_ds AND @end_ds.
308    SELECT * FROM x WHERE ts BETWEEN @start_ts AND @end_ts.
309
310    In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if
311    start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps
312    are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is
313    treated as an exclusive range and converted to inclusive by subtracting 1 microsecond. If the dialect is
314    T-SQL then 1 nanoseconds is subtracted to account for the increased precision.
315
316    Args:
317        start: Start timelike object.
318        end: End timelike object.
319
320    Example:
321        >>> make_inclusive("2020-01-01", "2020-01-01")
322        (datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2020, 1, 1, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc))
323
324    Returns:
325        A tuple of inclusive datetime objects.
326    """
327    return (to_datetime(start), make_inclusive_end(end, dialect=dialect))
328
329
330def make_inclusive_end(end: TimeLike, dialect: t.Optional[DialectType] = "") -> datetime:
331    import pandas as pd
332
333    exclusive_end = make_exclusive(end)
334    if dialect == "tsql":
335        return to_utc_timestamp(exclusive_end) - pd.Timedelta(1, unit="ns")
336    return exclusive_end - timedelta(microseconds=1)
337
338
339def make_exclusive(time: TimeLike) -> datetime:
340    dt = to_datetime(time)
341    if is_date(time):
342        dt = dt + timedelta(days=1)
343    return dt
344
345
346def make_ts_exclusive(time: TimeLike, dialect: DialectType) -> datetime:
347    ts = to_datetime(time)
348    if dialect == "tsql":
349        return to_utc_timestamp(ts) - pd.Timedelta(1, unit="ns")
350    return ts + timedelta(microseconds=1)
351
352
353def to_utc_timestamp(time: datetime) -> pd.Timestamp:
354    import pandas as pd
355
356    if time.tzinfo is not None:
357        return pd.Timestamp(time).tz_convert("utc")
358    return pd.Timestamp(time, tz="utc")
359
360
361def validate_date_range(
362    start: t.Optional[TimeLike],
363    end: t.Optional[TimeLike],
364) -> None:
365    if start and end and to_datetime(start) > to_datetime(end):
366        raise ValueError(
367            f"Start date / time ({start}) can't be greater than end date / time ({end})"
368        )
369
370
371def time_like_to_str(time_like: TimeLike) -> str:
372    if isinstance(time_like, str):
373        return time_like
374    if is_date(time_like):
375        return to_ds(time_like)
376    return to_ts(time_like)
377
378
379def is_categorical_relative_expression(expression: str) -> bool:
380    if expression.strip().lower() in DAY_SHORTCUT_EXPRESSIONS:
381        return True
382    grain_kwargs = freshness_date_parser.get_kwargs(expression)
383    if not grain_kwargs:
384        return False
385    return not any(k in TIME_UNITS for k in grain_kwargs)
386
387
388def is_relative(value: TimeLike) -> bool:
389    """
390    Tests a TimeLike object to see if it is a relative expression, eg '1 week ago' as opposed to an absolute timestamp
391    """
392    if isinstance(value, str):
393        return is_categorical_relative_expression(value)
394
395    return False
396
397
398def to_time_column(
399    time_column: t.Union[TimeLike, exp.Null],
400    time_column_type: exp.DataType,
401    dialect: str,
402    time_column_format: t.Optional[str] = None,
403    nullable: bool = False,
404) -> exp.Expr:
405    """Convert a TimeLike object to the same time format and type as the model's time column."""
406    if dialect == "clickhouse" and time_column_type.is_type(
407        *(exp.DataType.TEMPORAL_TYPES - {exp.DataType.Type.DATE, exp.DataType.Type.DATE32})
408    ):
409        if time_column_type.is_type(exp.DataType.Type.DATETIME64):
410            if nullable:
411                time_column_type.set("nullable", nullable)
412        else:
413            # Clickhouse will error if we pass fractional seconds to DateTime, so we always
414            # use DateTime64 for timestamps.
415            #
416            # `datetime` objects have microsecond precision, so we specify the type precision as 6.
417            # If a timezone is present in the passed type object, it is included in the DateTime64 type
418            # via the `expressions` arg.
419            time_column_type = exp.DataType.build(
420                exp.DataType.Type.DATETIME64,
421                expressions=[
422                    exp.DataTypeParam(this=exp.Literal(this=6, is_string=False)),
423                    *time_column_type.expressions,
424                ],
425                nullable=nullable or time_column_type.args.get("nullable", False),
426            )
427
428    if isinstance(time_column, exp.Null):
429        return exp.cast(time_column, to=time_column_type)
430    if time_column_type.is_type(exp.DataType.Type.DATE, exp.DataType.Type.DATE32):
431        return exp.cast(exp.Literal.string(to_ds(time_column)), to="date")
432    if time_column_type.is_type(*TEMPORAL_TZ_TYPES):
433        return exp.cast(exp.Literal.string(to_tstz(time_column)), to=time_column_type)
434    if time_column_type.is_type(*exp.DataType.TEMPORAL_TYPES):
435        return exp.cast(exp.Literal.string(to_ts(time_column)), to=time_column_type)
436
437    if time_column_format:
438        time_column = to_datetime(time_column).strftime(time_column_format)
439    if time_column_type.is_type(*exp.DataType.TEXT_TYPES):
440        return exp.Literal.string(time_column)
441    if time_column_type.is_type(*exp.DataType.NUMERIC_TYPES):
442        return exp.Literal.number(time_column)
443    return exp.convert(time_column)
444
445
446def pandas_timestamp_to_pydatetime(
447    df: pd.DataFrame, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
448) -> pd.DataFrame:
449    import pandas as pd
450    from pandas.api.types import is_datetime64_any_dtype  # type: ignore
451
452    for column in df.columns:
453        if is_datetime64_any_dtype(df.dtypes[column]):
454            # We must use `pd.Series` and dtype or pandas will convert it back to pd.Timestamp during assignment
455            # https://stackoverflow.com/a/68961834/1707525
456            df[column] = pd.Series(df[column].dt.to_pydatetime(), dtype="object")
457
458            if columns_to_types and columns_to_types[column].this in (
459                exp.DataType.Type.DATE,
460                exp.DataType.Type.DATE32,
461            ):
462                # Sometimes `to_pydatetime()` has already converted to date, so we only extract from datetime objects.
463                df[column] = df[column].map(
464                    lambda x: x.date() if type(x) is datetime and not pd.isna(x) else x
465                )
466
467    return df
468
469
470def format_tz_datetime(
471    time: TimeLike,
472    format_string: t.Optional[str] = "%Y-%m-%d %I:%M%p %Z",
473    use_local_timezone: bool = False,
474) -> str:
475    output_datetime = to_datetime(time)
476    if use_local_timezone:
477        local_timezone = datetime.now().astimezone().tzinfo
478        output_datetime = output_datetime.astimezone(local_timezone)
479    return (
480        output_datetime.strftime(format_string)
481        if format_string
482        else output_datetime.isoformat(sep=" ")
483    )
UTC = datetime.timezone.utc
TimeLike = typing.Union[datetime.date, datetime.datetime, str, int, float]
DatetimeRange = typing.Tuple[datetime.datetime, datetime.datetime]
DatetimeRanges = typing.List[typing.Tuple[datetime.datetime, datetime.datetime]]
DATE_INT_FMT = '%Y%m%d'
DAY_SHORTCUT_EXPRESSIONS = {'today', 'yesterday', 'tomorrow'}
TIME_UNITS = {'hours', 'minutes', 'seconds'}
TEMPORAL_TZ_TYPES = {<DType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <DType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, <DType.TIMETZ: 'TIMETZ'>}
def now(minute_floor: bool = True) -> datetime.datetime:
50def now(minute_floor: bool = True) -> datetime:
51    """
52    Current utc datetime with optional minute level accuracy / granularity.
53
54    minute_floor is set to True by default
55
56    Args:
57        minute_floor: If true (default), removes the second and microseconds from the current datetime.
58
59    Returns:
60        A datetime object with tz utc.
61    """
62    now = datetime.now(tz=UTC)
63    if minute_floor:
64        return now.replace(second=0, microsecond=0, tzinfo=UTC)
65    return now.replace(tzinfo=UTC)

Current utc datetime with optional minute level accuracy / granularity.

minute_floor is set to True by default

Arguments:
  • minute_floor: If true (default), removes the second and microseconds from the current datetime.
Returns:

A datetime object with tz utc.

def now_timestamp(minute_floor: bool = False) -> int:
68def now_timestamp(minute_floor: bool = False) -> int:
69    """
70    Current utc timestamp.
71
72    Args:
73        minute_floor: If true, removes the second and microseconds from the current datetime.
74
75    Returns:
76        UTC epoch millis timestamp
77    """
78    return to_timestamp(now(minute_floor))

Current utc timestamp.

Arguments:
  • minute_floor: If true, removes the second and microseconds from the current datetime.
Returns:

UTC epoch millis timestamp

def now_ds() -> str:
81def now_ds() -> str:
82    """
83    Current utc ds.
84
85    Returns:
86        Today's ds string.
87    """
88    return to_ds(now())

Current utc ds.

Returns:

Today's ds string.

def yesterday(relative_base: Optional[datetime.datetime] = None) -> datetime.datetime:
91def yesterday(relative_base: t.Optional[datetime] = None) -> datetime:
92    """
93    Yesterday utc datetime.
94
95    Returns:
96        A datetime object with tz utc representing yesterday's date
97    """
98    return to_datetime("yesterday", relative_base=relative_base)

Yesterday utc datetime.

Returns:

A datetime object with tz utc representing yesterday's date

def yesterday_ds(relative_base: Optional[datetime.datetime] = None) -> str:
101def yesterday_ds(relative_base: t.Optional[datetime] = None) -> str:
102    """
103    Yesterday utc ds.
104
105    Returns:
106        Yesterday's ds string.
107    """
108    return to_ds("yesterday", relative_base=relative_base)

Yesterday utc ds.

Returns:

Yesterday's ds string.

def yesterday_timestamp(relative_base: Optional[datetime.datetime] = None) -> int:
111def yesterday_timestamp(relative_base: t.Optional[datetime] = None) -> int:
112    """
113    Yesterday utc timestamp.
114
115    Returns:
116        UTC epoch millis timestamp of yesterday
117    """
118    return to_timestamp(yesterday(relative_base=relative_base))

Yesterday utc timestamp.

Returns:

UTC epoch millis timestamp of yesterday

def to_timestamp( value: Union[datetime.date, datetime.datetime, str, int, float], relative_base: Optional[datetime.datetime] = None, check_categorical_relative_expression: bool = True) -> int:
121def to_timestamp(
122    value: TimeLike,
123    relative_base: t.Optional[datetime] = None,
124    check_categorical_relative_expression: bool = True,
125) -> int:
126    """
127    Converts a value into an epoch millis timestamp.
128
129    Args:
130        value: A variety of date formats. If value is a string, it must be in iso format.
131        relative_base: The datetime to reference for time expressions that are using relative terms
132        check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
133
134    Returns:
135        Epoch millis timestamp.
136    """
137    return int(
138        to_datetime(
139            value,
140            relative_base=relative_base,
141            check_categorical_relative_expression=check_categorical_relative_expression,
142        ).timestamp()
143        * 1000
144    )

Converts a value into an epoch millis timestamp.

Arguments:
  • value: A variety of date formats. If value is a string, it must be in iso format.
  • relative_base: The datetime to reference for time expressions that are using relative terms
  • check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
Returns:

Epoch millis timestamp.

@ttl_cache()
def to_datetime( value: Union[datetime.date, datetime.datetime, str, int, float], relative_base: Optional[datetime.datetime] = None, check_categorical_relative_expression: bool = True, tz: Optional[datetime.tzinfo] = None) -> datetime.datetime:
147@ttl_cache()
148def to_datetime(
149    value: TimeLike,
150    relative_base: t.Optional[datetime] = None,
151    check_categorical_relative_expression: bool = True,
152    tz: t.Optional[tzinfo] = None,
153) -> datetime:
154    """Converts a value into a UTC datetime object.
155
156    Args:
157        value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
158        relative_base: The datetime to reference for time expressions that are using relative terms.
159        check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
160        tz: Timezone to convert datetime to, defaults to utc
161
162    Raises:
163        ValueError if value cannot be converted to a datetime.
164
165    Returns:
166        A datetime object with tz (default UTC).
167    """
168    if isinstance(value, datetime):
169        dt: t.Optional[datetime] = value
170    elif isinstance(value, date):
171        dt = datetime(value.year, value.month, value.day)
172    elif isinstance(value, exp.Expr):
173        return to_datetime(value.name)
174    else:
175        try:
176            epoch = float(value)
177        except ValueError:
178            epoch = None
179
180        if epoch is None:
181            relative_base = relative_base or now()
182            expression = str(value)
183            if check_categorical_relative_expression and is_categorical_relative_expression(
184                expression
185            ):
186                relative_base = relative_base.replace(hour=0, minute=0, second=0, microsecond=0)
187
188            # note: we hardcode TIMEZONE: UTC to work around this bug: https://github.com/scrapinghub/dateparser/issues/896
189            # where dateparser just silently fails if it cant interpret the contents of /etc/localtime
190            # this works because SQLMesh only deals with UTC, there is no concept of user local time
191            dt = dateparser.parse(
192                expression, settings={"RELATIVE_BASE": relative_base, "TIMEZONE": "UTC"}
193            )
194        else:
195            try:
196                dt = datetime.strptime(str(value), DATE_INT_FMT)
197            except ValueError:
198                dt = datetime.fromtimestamp(epoch / 1000.0, tz=UTC)
199
200    if dt is None:
201        raise ValueError(f"Could not convert `{value}` to datetime.")
202
203    tz = tz or UTC
204
205    if dt.tzinfo:
206        return dt if dt.tzinfo == tz else dt.astimezone(tz)
207    return dt.replace(tzinfo=tz)

Converts a value into a UTC datetime object.

Arguments:
  • value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
  • relative_base: The datetime to reference for time expressions that are using relative terms.
  • check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
  • tz: Timezone to convert datetime to, defaults to utc
Raises:
  • ValueError if value cannot be converted to a datetime.
Returns:

A datetime object with tz (default UTC).

def to_date( value: Union[datetime.date, datetime.datetime, str, int, float], relative_base: Optional[datetime.datetime] = None) -> datetime.date:
210def to_date(value: TimeLike, relative_base: t.Optional[datetime] = None) -> date:
211    """Converts a value into a UTC date object
212    Args:
213        value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
214        relative_base: The datetime to reference for time expressions that are using relative terms
215
216    Raises:
217        ValueError if value cannot be converted to a date.
218
219    Returns:
220        A date object with tz utc.
221    """
222    return to_datetime(value, relative_base).date()

Converts a value into a UTC date object

Arguments:
  • value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
  • relative_base: The datetime to reference for time expressions that are using relative terms
Raises:
  • ValueError if value cannot be converted to a date.
Returns:

A date object with tz utc.

def date_dict( execution_time: Union[datetime.date, datetime.datetime, str, int, float], start: Union[datetime.date, datetime.datetime, str, int, float, NoneType], end: Union[datetime.date, datetime.datetime, str, int, float, NoneType]) -> Dict[str, Union[datetime.date, datetime.datetime, str, int, float]]:
225def date_dict(
226    execution_time: TimeLike,
227    start: t.Optional[TimeLike],
228    end: t.Optional[TimeLike],
229) -> t.Dict[str, TimeLike]:
230    """Creates a kwarg dictionary of datetime variables for use in SQL Contexts.
231
232    Keys are like start_date, start_ds, end_date, end_ds...
233
234    Args:
235        execution_time: Execution time.
236        start: Start time.
237        end: End time.
238
239    Returns:
240        A dictionary with various keys pointing to datetime formats.
241    """
242    kwargs: t.Dict[str, t.Union[str, datetime, date, float, int]] = {}
243
244    execution_dt = to_datetime(execution_time)
245    prefixes = [
246        ("latest", execution_dt),  # TODO: Preserved for backward compatibility. Remove in 1.0.0.
247        ("execution", execution_dt),
248    ]
249
250    if start is not None:
251        prefixes.append(("start", to_datetime(start)))
252    if end is not None:
253        prefixes.append(("end", to_datetime(end)))
254
255    for prefix, time_like in prefixes:
256        dt = to_datetime(time_like)
257        dtntz = dt.replace(tzinfo=None)
258
259        millis = to_timestamp(time_like)
260
261        kwargs[f"{prefix}_dt"] = dt
262        kwargs[f"{prefix}_dtntz"] = dtntz
263        kwargs[f"{prefix}_date"] = to_date(dt)
264        kwargs[f"{prefix}_ds"] = to_ds(time_like)
265        kwargs[f"{prefix}_ts"] = to_ts(dt)
266        kwargs[f"{prefix}_tstz"] = to_tstz(dt)
267        kwargs[f"{prefix}_epoch"] = millis / 1000
268        kwargs[f"{prefix}_millis"] = millis
269        kwargs[f"{prefix}_hour"] = dt.hour
270
271    return kwargs

Creates a kwarg dictionary of datetime variables for use in SQL Contexts.

Keys are like start_date, start_ds, end_date, end_ds...

Arguments:
  • execution_time: Execution time.
  • start: Start time.
  • end: End time.
Returns:

A dictionary with various keys pointing to datetime formats.

def to_ds( obj: Union[datetime.date, datetime.datetime, str, int, float], relative_base: Optional[datetime.datetime] = None) -> str:
274def to_ds(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str:
275    """Converts a TimeLike object into YYYY-MM-DD formatted string."""
276    return to_ts(obj, relative_base=relative_base)[0:10]

Converts a TimeLike object into YYYY-MM-DD formatted string.

def to_ts( obj: Union[datetime.date, datetime.datetime, str, int, float], relative_base: Optional[datetime.datetime] = None) -> str:
279def to_ts(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str:
280    """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string."""
281    return to_datetime(obj, relative_base=relative_base).replace(tzinfo=None).isoformat(sep=" ")

Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string.

def to_tstz( obj: Union[datetime.date, datetime.datetime, str, int, float], relative_base: Optional[datetime.datetime] = None) -> str:
284def to_tstz(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str:
285    """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string."""
286    return to_datetime(obj, relative_base=relative_base).isoformat(sep=" ")

Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string.

def is_date(obj: Union[datetime.date, datetime.datetime, str, int, float]) -> bool:
289def is_date(obj: TimeLike) -> bool:
290    """Checks if a TimeLike object should be treated like a date."""
291    if isinstance(obj, date) and not isinstance(obj, datetime):
292        return True
293
294    try:
295        time.strptime(str(obj).replace("-", ""), DATE_INT_FMT)
296        return True
297    except ValueError:
298        return False

Checks if a TimeLike object should be treated like a date.

def make_inclusive( start: Union[datetime.date, datetime.datetime, str, int, float], end: Union[datetime.date, datetime.datetime, str, int, float], dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType] = '') -> Tuple[datetime.datetime, datetime.datetime]:
301def make_inclusive(
302    start: TimeLike, end: TimeLike, dialect: t.Optional[DialectType] = ""
303) -> DatetimeRange:
304    """Adjust start and end times to to become inclusive datetimes.
305
306    SQLMesh treats start and end times as inclusive so that filters can be written as
307
308    SELECT * FROM x WHERE ds BETWEEN @start_ds AND @end_ds.
309    SELECT * FROM x WHERE ts BETWEEN @start_ts AND @end_ts.
310
311    In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if
312    start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps
313    are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is
314    treated as an exclusive range and converted to inclusive by subtracting 1 microsecond. If the dialect is
315    T-SQL then 1 nanoseconds is subtracted to account for the increased precision.
316
317    Args:
318        start: Start timelike object.
319        end: End timelike object.
320
321    Example:
322        >>> make_inclusive("2020-01-01", "2020-01-01")
323        (datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2020, 1, 1, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc))
324
325    Returns:
326        A tuple of inclusive datetime objects.
327    """
328    return (to_datetime(start), make_inclusive_end(end, dialect=dialect))

Adjust start and end times to to become inclusive datetimes.

SQLMesh treats start and end times as inclusive so that filters can be written as

SELECT * FROM x WHERE ds BETWEEN @start_ds AND @end_ds. SELECT * FROM x WHERE ts BETWEEN @start_ts AND @end_ts.

In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is treated as an exclusive range and converted to inclusive by subtracting 1 microsecond. If the dialect is T-SQL then 1 nanoseconds is subtracted to account for the increased precision.

Arguments:
  • start: Start timelike object.
  • end: End timelike object.
Example:
>>> make_inclusive("2020-01-01", "2020-01-01")
(datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2020, 1, 1, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc))
Returns:

A tuple of inclusive datetime objects.

def make_inclusive_end( end: Union[datetime.date, datetime.datetime, str, int, float], dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType] = '') -> datetime.datetime:
331def make_inclusive_end(end: TimeLike, dialect: t.Optional[DialectType] = "") -> datetime:
332    import pandas as pd
333
334    exclusive_end = make_exclusive(end)
335    if dialect == "tsql":
336        return to_utc_timestamp(exclusive_end) - pd.Timedelta(1, unit="ns")
337    return exclusive_end - timedelta(microseconds=1)
def make_exclusive( time: Union[datetime.date, datetime.datetime, str, int, float]) -> datetime.datetime:
340def make_exclusive(time: TimeLike) -> datetime:
341    dt = to_datetime(time)
342    if is_date(time):
343        dt = dt + timedelta(days=1)
344    return dt
def make_ts_exclusive( time: Union[datetime.date, datetime.datetime, str, int, float], dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> datetime.datetime:
347def make_ts_exclusive(time: TimeLike, dialect: DialectType) -> datetime:
348    ts = to_datetime(time)
349    if dialect == "tsql":
350        return to_utc_timestamp(ts) - pd.Timedelta(1, unit="ns")
351    return ts + timedelta(microseconds=1)
def to_utc_timestamp(time: datetime.datetime) -> pandas._libs.tslibs.timestamps.Timestamp:
354def to_utc_timestamp(time: datetime) -> pd.Timestamp:
355    import pandas as pd
356
357    if time.tzinfo is not None:
358        return pd.Timestamp(time).tz_convert("utc")
359    return pd.Timestamp(time, tz="utc")
def validate_date_range( start: Union[datetime.date, datetime.datetime, str, int, float, NoneType], end: Union[datetime.date, datetime.datetime, str, int, float, NoneType]) -> None:
362def validate_date_range(
363    start: t.Optional[TimeLike],
364    end: t.Optional[TimeLike],
365) -> None:
366    if start and end and to_datetime(start) > to_datetime(end):
367        raise ValueError(
368            f"Start date / time ({start}) can't be greater than end date / time ({end})"
369        )
def time_like_to_str( time_like: Union[datetime.date, datetime.datetime, str, int, float]) -> str:
372def time_like_to_str(time_like: TimeLike) -> str:
373    if isinstance(time_like, str):
374        return time_like
375    if is_date(time_like):
376        return to_ds(time_like)
377    return to_ts(time_like)
def is_categorical_relative_expression(expression: str) -> bool:
380def is_categorical_relative_expression(expression: str) -> bool:
381    if expression.strip().lower() in DAY_SHORTCUT_EXPRESSIONS:
382        return True
383    grain_kwargs = freshness_date_parser.get_kwargs(expression)
384    if not grain_kwargs:
385        return False
386    return not any(k in TIME_UNITS for k in grain_kwargs)
def is_relative(value: Union[datetime.date, datetime.datetime, str, int, float]) -> bool:
389def is_relative(value: TimeLike) -> bool:
390    """
391    Tests a TimeLike object to see if it is a relative expression, eg '1 week ago' as opposed to an absolute timestamp
392    """
393    if isinstance(value, str):
394        return is_categorical_relative_expression(value)
395
396    return False

Tests a TimeLike object to see if it is a relative expression, eg '1 week ago' as opposed to an absolute timestamp

def to_time_column( time_column: Union[datetime.date, datetime.datetime, str, int, float, sqlglot.expressions.core.Null], time_column_type: sqlglot.expressions.datatypes.DataType, dialect: str, time_column_format: Optional[str] = None, nullable: bool = False) -> sqlglot.expressions.core.Expr:
399def to_time_column(
400    time_column: t.Union[TimeLike, exp.Null],
401    time_column_type: exp.DataType,
402    dialect: str,
403    time_column_format: t.Optional[str] = None,
404    nullable: bool = False,
405) -> exp.Expr:
406    """Convert a TimeLike object to the same time format and type as the model's time column."""
407    if dialect == "clickhouse" and time_column_type.is_type(
408        *(exp.DataType.TEMPORAL_TYPES - {exp.DataType.Type.DATE, exp.DataType.Type.DATE32})
409    ):
410        if time_column_type.is_type(exp.DataType.Type.DATETIME64):
411            if nullable:
412                time_column_type.set("nullable", nullable)
413        else:
414            # Clickhouse will error if we pass fractional seconds to DateTime, so we always
415            # use DateTime64 for timestamps.
416            #
417            # `datetime` objects have microsecond precision, so we specify the type precision as 6.
418            # If a timezone is present in the passed type object, it is included in the DateTime64 type
419            # via the `expressions` arg.
420            time_column_type = exp.DataType.build(
421                exp.DataType.Type.DATETIME64,
422                expressions=[
423                    exp.DataTypeParam(this=exp.Literal(this=6, is_string=False)),
424                    *time_column_type.expressions,
425                ],
426                nullable=nullable or time_column_type.args.get("nullable", False),
427            )
428
429    if isinstance(time_column, exp.Null):
430        return exp.cast(time_column, to=time_column_type)
431    if time_column_type.is_type(exp.DataType.Type.DATE, exp.DataType.Type.DATE32):
432        return exp.cast(exp.Literal.string(to_ds(time_column)), to="date")
433    if time_column_type.is_type(*TEMPORAL_TZ_TYPES):
434        return exp.cast(exp.Literal.string(to_tstz(time_column)), to=time_column_type)
435    if time_column_type.is_type(*exp.DataType.TEMPORAL_TYPES):
436        return exp.cast(exp.Literal.string(to_ts(time_column)), to=time_column_type)
437
438    if time_column_format:
439        time_column = to_datetime(time_column).strftime(time_column_format)
440    if time_column_type.is_type(*exp.DataType.TEXT_TYPES):
441        return exp.Literal.string(time_column)
442    if time_column_type.is_type(*exp.DataType.NUMERIC_TYPES):
443        return exp.Literal.number(time_column)
444    return exp.convert(time_column)

Convert a TimeLike object to the same time format and type as the model's time column.

def pandas_timestamp_to_pydatetime( df: pandas.core.frame.DataFrame, columns_to_types: Optional[Dict[str, sqlglot.expressions.datatypes.DataType]] = None) -> pandas.core.frame.DataFrame:
447def pandas_timestamp_to_pydatetime(
448    df: pd.DataFrame, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None
449) -> pd.DataFrame:
450    import pandas as pd
451    from pandas.api.types import is_datetime64_any_dtype  # type: ignore
452
453    for column in df.columns:
454        if is_datetime64_any_dtype(df.dtypes[column]):
455            # We must use `pd.Series` and dtype or pandas will convert it back to pd.Timestamp during assignment
456            # https://stackoverflow.com/a/68961834/1707525
457            df[column] = pd.Series(df[column].dt.to_pydatetime(), dtype="object")
458
459            if columns_to_types and columns_to_types[column].this in (
460                exp.DataType.Type.DATE,
461                exp.DataType.Type.DATE32,
462            ):
463                # Sometimes `to_pydatetime()` has already converted to date, so we only extract from datetime objects.
464                df[column] = df[column].map(
465                    lambda x: x.date() if type(x) is datetime and not pd.isna(x) else x
466                )
467
468    return df
def format_tz_datetime( time: Union[datetime.date, datetime.datetime, str, int, float], format_string: Optional[str] = '%Y-%m-%d %I:%M%p %Z', use_local_timezone: bool = False) -> str:
471def format_tz_datetime(
472    time: TimeLike,
473    format_string: t.Optional[str] = "%Y-%m-%d %I:%M%p %Z",
474    use_local_timezone: bool = False,
475) -> str:
476    output_datetime = to_datetime(time)
477    if use_local_timezone:
478        local_timezone = datetime.now().astimezone().tzinfo
479        output_datetime = output_datetime.astimezone(local_timezone)
480    return (
481        output_datetime.strftime(format_string)
482        if format_string
483        else output_datetime.isoformat(sep=" ")
484    )