sqlmesh.utils.date
1from __future__ import annotations 2 3import re 4import time 5import typing as t 6import warnings 7 8from datetime import date, datetime, timedelta, timezone, tzinfo 9 10import dateparser 11from dateparser import freshness_date_parser as freshness_date_parser_module 12from dateparser.freshness_date_parser import freshness_date_parser 13from sqlglot import exp 14 15from sqlmesh.utils import ttl_cache 16 17if t.TYPE_CHECKING: 18 import pandas as pd 19 20 from sqlglot.dialects.dialect import DialectType 21 22UTC = timezone.utc 23TimeLike = t.Union[date, datetime, str, int, float] 24DatetimeRange = t.Tuple[datetime, datetime] 25DatetimeRanges = t.List[DatetimeRange] 26DATE_INT_FMT = "%Y%m%d" 27 28 29warnings.filterwarnings( 30 "ignore", 31 message="The localize method is no longer necessary, as this time zone supports the fold attribute", 32) 33 34 35# The Freshness Date Data Parser doesn't support plural units so we add the `s?` to the expression 36freshness_date_parser_module.PATTERN = re.compile( # type: ignore 37 r"(\d+[.,]?\d*)\s*(%s)s?\b" % freshness_date_parser_module._UNITS, # type: ignore 38 re.I | re.S | re.U, # type: ignore 39) 40DAY_SHORTCUT_EXPRESSIONS = {"today", "yesterday", "tomorrow"} 41TIME_UNITS = {"hours", "minutes", "seconds"} 42TEMPORAL_TZ_TYPES = { 43 exp.DataType.Type.TIMETZ, 44 exp.DataType.Type.TIMESTAMPTZ, 45 exp.DataType.Type.TIMESTAMPLTZ, 46} 47 48 49def now(minute_floor: bool = True) -> datetime: 50 """ 51 Current utc datetime with optional minute level accuracy / granularity. 52 53 minute_floor is set to True by default 54 55 Args: 56 minute_floor: If true (default), removes the second and microseconds from the current datetime. 57 58 Returns: 59 A datetime object with tz utc. 60 """ 61 now = datetime.now(tz=UTC) 62 if minute_floor: 63 return now.replace(second=0, microsecond=0, tzinfo=UTC) 64 return now.replace(tzinfo=UTC) 65 66 67def now_timestamp(minute_floor: bool = False) -> int: 68 """ 69 Current utc timestamp. 70 71 Args: 72 minute_floor: If true, removes the second and microseconds from the current datetime. 73 74 Returns: 75 UTC epoch millis timestamp 76 """ 77 return to_timestamp(now(minute_floor)) 78 79 80def now_ds() -> str: 81 """ 82 Current utc ds. 83 84 Returns: 85 Today's ds string. 86 """ 87 return to_ds(now()) 88 89 90def yesterday(relative_base: t.Optional[datetime] = None) -> datetime: 91 """ 92 Yesterday utc datetime. 93 94 Returns: 95 A datetime object with tz utc representing yesterday's date 96 """ 97 return to_datetime("yesterday", relative_base=relative_base) 98 99 100def yesterday_ds(relative_base: t.Optional[datetime] = None) -> str: 101 """ 102 Yesterday utc ds. 103 104 Returns: 105 Yesterday's ds string. 106 """ 107 return to_ds("yesterday", relative_base=relative_base) 108 109 110def yesterday_timestamp(relative_base: t.Optional[datetime] = None) -> int: 111 """ 112 Yesterday utc timestamp. 113 114 Returns: 115 UTC epoch millis timestamp of yesterday 116 """ 117 return to_timestamp(yesterday(relative_base=relative_base)) 118 119 120def to_timestamp( 121 value: TimeLike, 122 relative_base: t.Optional[datetime] = None, 123 check_categorical_relative_expression: bool = True, 124) -> int: 125 """ 126 Converts a value into an epoch millis timestamp. 127 128 Args: 129 value: A variety of date formats. If value is a string, it must be in iso format. 130 relative_base: The datetime to reference for time expressions that are using relative terms 131 check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical. 132 133 Returns: 134 Epoch millis timestamp. 135 """ 136 return int( 137 to_datetime( 138 value, 139 relative_base=relative_base, 140 check_categorical_relative_expression=check_categorical_relative_expression, 141 ).timestamp() 142 * 1000 143 ) 144 145 146@ttl_cache() 147def to_datetime( 148 value: TimeLike, 149 relative_base: t.Optional[datetime] = None, 150 check_categorical_relative_expression: bool = True, 151 tz: t.Optional[tzinfo] = None, 152) -> datetime: 153 """Converts a value into a UTC datetime object. 154 155 Args: 156 value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs. 157 relative_base: The datetime to reference for time expressions that are using relative terms. 158 check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical. 159 tz: Timezone to convert datetime to, defaults to utc 160 161 Raises: 162 ValueError if value cannot be converted to a datetime. 163 164 Returns: 165 A datetime object with tz (default UTC). 166 """ 167 if isinstance(value, datetime): 168 dt: t.Optional[datetime] = value 169 elif isinstance(value, date): 170 dt = datetime(value.year, value.month, value.day) 171 elif isinstance(value, exp.Expr): 172 return to_datetime(value.name) 173 else: 174 try: 175 epoch = float(value) 176 except ValueError: 177 epoch = None 178 179 if epoch is None: 180 relative_base = relative_base or now() 181 expression = str(value) 182 if check_categorical_relative_expression and is_categorical_relative_expression( 183 expression 184 ): 185 relative_base = relative_base.replace(hour=0, minute=0, second=0, microsecond=0) 186 187 # note: we hardcode TIMEZONE: UTC to work around this bug: https://github.com/scrapinghub/dateparser/issues/896 188 # where dateparser just silently fails if it cant interpret the contents of /etc/localtime 189 # this works because SQLMesh only deals with UTC, there is no concept of user local time 190 dt = dateparser.parse( 191 expression, settings={"RELATIVE_BASE": relative_base, "TIMEZONE": "UTC"} 192 ) 193 else: 194 try: 195 dt = datetime.strptime(str(value), DATE_INT_FMT) 196 except ValueError: 197 dt = datetime.fromtimestamp(epoch / 1000.0, tz=UTC) 198 199 if dt is None: 200 raise ValueError(f"Could not convert `{value}` to datetime.") 201 202 tz = tz or UTC 203 204 if dt.tzinfo: 205 return dt if dt.tzinfo == tz else dt.astimezone(tz) 206 return dt.replace(tzinfo=tz) 207 208 209def to_date(value: TimeLike, relative_base: t.Optional[datetime] = None) -> date: 210 """Converts a value into a UTC date object 211 Args: 212 value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs. 213 relative_base: The datetime to reference for time expressions that are using relative terms 214 215 Raises: 216 ValueError if value cannot be converted to a date. 217 218 Returns: 219 A date object with tz utc. 220 """ 221 return to_datetime(value, relative_base).date() 222 223 224def date_dict( 225 execution_time: TimeLike, 226 start: t.Optional[TimeLike], 227 end: t.Optional[TimeLike], 228) -> t.Dict[str, TimeLike]: 229 """Creates a kwarg dictionary of datetime variables for use in SQL Contexts. 230 231 Keys are like start_date, start_ds, end_date, end_ds... 232 233 Args: 234 execution_time: Execution time. 235 start: Start time. 236 end: End time. 237 238 Returns: 239 A dictionary with various keys pointing to datetime formats. 240 """ 241 kwargs: t.Dict[str, t.Union[str, datetime, date, float, int]] = {} 242 243 execution_dt = to_datetime(execution_time) 244 prefixes = [ 245 ("latest", execution_dt), # TODO: Preserved for backward compatibility. Remove in 1.0.0. 246 ("execution", execution_dt), 247 ] 248 249 if start is not None: 250 prefixes.append(("start", to_datetime(start))) 251 if end is not None: 252 prefixes.append(("end", to_datetime(end))) 253 254 for prefix, time_like in prefixes: 255 dt = to_datetime(time_like) 256 dtntz = dt.replace(tzinfo=None) 257 258 millis = to_timestamp(time_like) 259 260 kwargs[f"{prefix}_dt"] = dt 261 kwargs[f"{prefix}_dtntz"] = dtntz 262 kwargs[f"{prefix}_date"] = to_date(dt) 263 kwargs[f"{prefix}_ds"] = to_ds(time_like) 264 kwargs[f"{prefix}_ts"] = to_ts(dt) 265 kwargs[f"{prefix}_tstz"] = to_tstz(dt) 266 kwargs[f"{prefix}_epoch"] = millis / 1000 267 kwargs[f"{prefix}_millis"] = millis 268 kwargs[f"{prefix}_hour"] = dt.hour 269 270 return kwargs 271 272 273def to_ds(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str: 274 """Converts a TimeLike object into YYYY-MM-DD formatted string.""" 275 return to_ts(obj, relative_base=relative_base)[0:10] 276 277 278def to_ts(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str: 279 """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string.""" 280 return to_datetime(obj, relative_base=relative_base).replace(tzinfo=None).isoformat(sep=" ") 281 282 283def to_tstz(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str: 284 """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string.""" 285 return to_datetime(obj, relative_base=relative_base).isoformat(sep=" ") 286 287 288def is_date(obj: TimeLike) -> bool: 289 """Checks if a TimeLike object should be treated like a date.""" 290 if isinstance(obj, date) and not isinstance(obj, datetime): 291 return True 292 293 try: 294 time.strptime(str(obj).replace("-", ""), DATE_INT_FMT) 295 return True 296 except ValueError: 297 return False 298 299 300def make_inclusive( 301 start: TimeLike, end: TimeLike, dialect: t.Optional[DialectType] = "" 302) -> DatetimeRange: 303 """Adjust start and end times to to become inclusive datetimes. 304 305 SQLMesh treats start and end times as inclusive so that filters can be written as 306 307 SELECT * FROM x WHERE ds BETWEEN @start_ds AND @end_ds. 308 SELECT * FROM x WHERE ts BETWEEN @start_ts AND @end_ts. 309 310 In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if 311 start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps 312 are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is 313 treated as an exclusive range and converted to inclusive by subtracting 1 microsecond. If the dialect is 314 T-SQL then 1 nanoseconds is subtracted to account for the increased precision. 315 316 Args: 317 start: Start timelike object. 318 end: End timelike object. 319 320 Example: 321 >>> make_inclusive("2020-01-01", "2020-01-01") 322 (datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2020, 1, 1, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)) 323 324 Returns: 325 A tuple of inclusive datetime objects. 326 """ 327 return (to_datetime(start), make_inclusive_end(end, dialect=dialect)) 328 329 330def make_inclusive_end(end: TimeLike, dialect: t.Optional[DialectType] = "") -> datetime: 331 import pandas as pd 332 333 exclusive_end = make_exclusive(end) 334 if dialect == "tsql": 335 return to_utc_timestamp(exclusive_end) - pd.Timedelta(1, unit="ns") 336 return exclusive_end - timedelta(microseconds=1) 337 338 339def make_exclusive(time: TimeLike) -> datetime: 340 dt = to_datetime(time) 341 if is_date(time): 342 dt = dt + timedelta(days=1) 343 return dt 344 345 346def make_ts_exclusive(time: TimeLike, dialect: DialectType) -> datetime: 347 ts = to_datetime(time) 348 if dialect == "tsql": 349 return to_utc_timestamp(ts) - pd.Timedelta(1, unit="ns") 350 return ts + timedelta(microseconds=1) 351 352 353def to_utc_timestamp(time: datetime) -> pd.Timestamp: 354 import pandas as pd 355 356 if time.tzinfo is not None: 357 return pd.Timestamp(time).tz_convert("utc") 358 return pd.Timestamp(time, tz="utc") 359 360 361def validate_date_range( 362 start: t.Optional[TimeLike], 363 end: t.Optional[TimeLike], 364) -> None: 365 if start and end and to_datetime(start) > to_datetime(end): 366 raise ValueError( 367 f"Start date / time ({start}) can't be greater than end date / time ({end})" 368 ) 369 370 371def time_like_to_str(time_like: TimeLike) -> str: 372 if isinstance(time_like, str): 373 return time_like 374 if is_date(time_like): 375 return to_ds(time_like) 376 return to_ts(time_like) 377 378 379def is_categorical_relative_expression(expression: str) -> bool: 380 if expression.strip().lower() in DAY_SHORTCUT_EXPRESSIONS: 381 return True 382 grain_kwargs = freshness_date_parser.get_kwargs(expression) 383 if not grain_kwargs: 384 return False 385 return not any(k in TIME_UNITS for k in grain_kwargs) 386 387 388def is_relative(value: TimeLike) -> bool: 389 """ 390 Tests a TimeLike object to see if it is a relative expression, eg '1 week ago' as opposed to an absolute timestamp 391 """ 392 if isinstance(value, str): 393 return is_categorical_relative_expression(value) 394 395 return False 396 397 398def to_time_column( 399 time_column: t.Union[TimeLike, exp.Null], 400 time_column_type: exp.DataType, 401 dialect: str, 402 time_column_format: t.Optional[str] = None, 403 nullable: bool = False, 404) -> exp.Expr: 405 """Convert a TimeLike object to the same time format and type as the model's time column.""" 406 if dialect == "clickhouse" and time_column_type.is_type( 407 *(exp.DataType.TEMPORAL_TYPES - {exp.DataType.Type.DATE, exp.DataType.Type.DATE32}) 408 ): 409 if time_column_type.is_type(exp.DataType.Type.DATETIME64): 410 if nullable: 411 time_column_type.set("nullable", nullable) 412 else: 413 # Clickhouse will error if we pass fractional seconds to DateTime, so we always 414 # use DateTime64 for timestamps. 415 # 416 # `datetime` objects have microsecond precision, so we specify the type precision as 6. 417 # If a timezone is present in the passed type object, it is included in the DateTime64 type 418 # via the `expressions` arg. 419 time_column_type = exp.DataType.build( 420 exp.DataType.Type.DATETIME64, 421 expressions=[ 422 exp.DataTypeParam(this=exp.Literal(this=6, is_string=False)), 423 *time_column_type.expressions, 424 ], 425 nullable=nullable or time_column_type.args.get("nullable", False), 426 ) 427 428 if isinstance(time_column, exp.Null): 429 return exp.cast(time_column, to=time_column_type) 430 if time_column_type.is_type(exp.DataType.Type.DATE, exp.DataType.Type.DATE32): 431 return exp.cast(exp.Literal.string(to_ds(time_column)), to="date") 432 if time_column_type.is_type(*TEMPORAL_TZ_TYPES): 433 return exp.cast(exp.Literal.string(to_tstz(time_column)), to=time_column_type) 434 if time_column_type.is_type(*exp.DataType.TEMPORAL_TYPES): 435 return exp.cast(exp.Literal.string(to_ts(time_column)), to=time_column_type) 436 437 if time_column_format: 438 time_column = to_datetime(time_column).strftime(time_column_format) 439 if time_column_type.is_type(*exp.DataType.TEXT_TYPES): 440 return exp.Literal.string(time_column) 441 if time_column_type.is_type(*exp.DataType.NUMERIC_TYPES): 442 return exp.Literal.number(time_column) 443 return exp.convert(time_column) 444 445 446def pandas_timestamp_to_pydatetime( 447 df: pd.DataFrame, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None 448) -> pd.DataFrame: 449 import pandas as pd 450 from pandas.api.types import is_datetime64_any_dtype # type: ignore 451 452 for column in df.columns: 453 if is_datetime64_any_dtype(df.dtypes[column]): 454 # We must use `pd.Series` and dtype or pandas will convert it back to pd.Timestamp during assignment 455 # https://stackoverflow.com/a/68961834/1707525 456 df[column] = pd.Series(df[column].dt.to_pydatetime(), dtype="object") 457 458 if columns_to_types and columns_to_types[column].this in ( 459 exp.DataType.Type.DATE, 460 exp.DataType.Type.DATE32, 461 ): 462 # Sometimes `to_pydatetime()` has already converted to date, so we only extract from datetime objects. 463 df[column] = df[column].map( 464 lambda x: x.date() if type(x) is datetime and not pd.isna(x) else x 465 ) 466 467 return df 468 469 470def format_tz_datetime( 471 time: TimeLike, 472 format_string: t.Optional[str] = "%Y-%m-%d %I:%M%p %Z", 473 use_local_timezone: bool = False, 474) -> str: 475 output_datetime = to_datetime(time) 476 if use_local_timezone: 477 local_timezone = datetime.now().astimezone().tzinfo 478 output_datetime = output_datetime.astimezone(local_timezone) 479 return ( 480 output_datetime.strftime(format_string) 481 if format_string 482 else output_datetime.isoformat(sep=" ") 483 )
50def now(minute_floor: bool = True) -> datetime: 51 """ 52 Current utc datetime with optional minute level accuracy / granularity. 53 54 minute_floor is set to True by default 55 56 Args: 57 minute_floor: If true (default), removes the second and microseconds from the current datetime. 58 59 Returns: 60 A datetime object with tz utc. 61 """ 62 now = datetime.now(tz=UTC) 63 if minute_floor: 64 return now.replace(second=0, microsecond=0, tzinfo=UTC) 65 return now.replace(tzinfo=UTC)
Current utc datetime with optional minute level accuracy / granularity.
minute_floor is set to True by default
Arguments:
- minute_floor: If true (default), removes the second and microseconds from the current datetime.
Returns:
A datetime object with tz utc.
68def now_timestamp(minute_floor: bool = False) -> int: 69 """ 70 Current utc timestamp. 71 72 Args: 73 minute_floor: If true, removes the second and microseconds from the current datetime. 74 75 Returns: 76 UTC epoch millis timestamp 77 """ 78 return to_timestamp(now(minute_floor))
Current utc timestamp.
Arguments:
- minute_floor: If true, removes the second and microseconds from the current datetime.
Returns:
UTC epoch millis timestamp
81def now_ds() -> str: 82 """ 83 Current utc ds. 84 85 Returns: 86 Today's ds string. 87 """ 88 return to_ds(now())
Current utc ds.
Returns:
Today's ds string.
91def yesterday(relative_base: t.Optional[datetime] = None) -> datetime: 92 """ 93 Yesterday utc datetime. 94 95 Returns: 96 A datetime object with tz utc representing yesterday's date 97 """ 98 return to_datetime("yesterday", relative_base=relative_base)
Yesterday utc datetime.
Returns:
A datetime object with tz utc representing yesterday's date
101def yesterday_ds(relative_base: t.Optional[datetime] = None) -> str: 102 """ 103 Yesterday utc ds. 104 105 Returns: 106 Yesterday's ds string. 107 """ 108 return to_ds("yesterday", relative_base=relative_base)
Yesterday utc ds.
Returns:
Yesterday's ds string.
111def yesterday_timestamp(relative_base: t.Optional[datetime] = None) -> int: 112 """ 113 Yesterday utc timestamp. 114 115 Returns: 116 UTC epoch millis timestamp of yesterday 117 """ 118 return to_timestamp(yesterday(relative_base=relative_base))
Yesterday utc timestamp.
Returns:
UTC epoch millis timestamp of yesterday
121def to_timestamp( 122 value: TimeLike, 123 relative_base: t.Optional[datetime] = None, 124 check_categorical_relative_expression: bool = True, 125) -> int: 126 """ 127 Converts a value into an epoch millis timestamp. 128 129 Args: 130 value: A variety of date formats. If value is a string, it must be in iso format. 131 relative_base: The datetime to reference for time expressions that are using relative terms 132 check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical. 133 134 Returns: 135 Epoch millis timestamp. 136 """ 137 return int( 138 to_datetime( 139 value, 140 relative_base=relative_base, 141 check_categorical_relative_expression=check_categorical_relative_expression, 142 ).timestamp() 143 * 1000 144 )
Converts a value into an epoch millis timestamp.
Arguments:
- value: A variety of date formats. If value is a string, it must be in iso format.
- relative_base: The datetime to reference for time expressions that are using relative terms
- check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
Returns:
Epoch millis timestamp.
147@ttl_cache() 148def to_datetime( 149 value: TimeLike, 150 relative_base: t.Optional[datetime] = None, 151 check_categorical_relative_expression: bool = True, 152 tz: t.Optional[tzinfo] = None, 153) -> datetime: 154 """Converts a value into a UTC datetime object. 155 156 Args: 157 value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs. 158 relative_base: The datetime to reference for time expressions that are using relative terms. 159 check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical. 160 tz: Timezone to convert datetime to, defaults to utc 161 162 Raises: 163 ValueError if value cannot be converted to a datetime. 164 165 Returns: 166 A datetime object with tz (default UTC). 167 """ 168 if isinstance(value, datetime): 169 dt: t.Optional[datetime] = value 170 elif isinstance(value, date): 171 dt = datetime(value.year, value.month, value.day) 172 elif isinstance(value, exp.Expr): 173 return to_datetime(value.name) 174 else: 175 try: 176 epoch = float(value) 177 except ValueError: 178 epoch = None 179 180 if epoch is None: 181 relative_base = relative_base or now() 182 expression = str(value) 183 if check_categorical_relative_expression and is_categorical_relative_expression( 184 expression 185 ): 186 relative_base = relative_base.replace(hour=0, minute=0, second=0, microsecond=0) 187 188 # note: we hardcode TIMEZONE: UTC to work around this bug: https://github.com/scrapinghub/dateparser/issues/896 189 # where dateparser just silently fails if it cant interpret the contents of /etc/localtime 190 # this works because SQLMesh only deals with UTC, there is no concept of user local time 191 dt = dateparser.parse( 192 expression, settings={"RELATIVE_BASE": relative_base, "TIMEZONE": "UTC"} 193 ) 194 else: 195 try: 196 dt = datetime.strptime(str(value), DATE_INT_FMT) 197 except ValueError: 198 dt = datetime.fromtimestamp(epoch / 1000.0, tz=UTC) 199 200 if dt is None: 201 raise ValueError(f"Could not convert `{value}` to datetime.") 202 203 tz = tz or UTC 204 205 if dt.tzinfo: 206 return dt if dt.tzinfo == tz else dt.astimezone(tz) 207 return dt.replace(tzinfo=tz)
Converts a value into a UTC datetime object.
Arguments:
- value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
- relative_base: The datetime to reference for time expressions that are using relative terms.
- check_categorical_relative_expression: If True, takes into account the relative expressions that are categorical.
- tz: Timezone to convert datetime to, defaults to utc
Raises:
- ValueError if value cannot be converted to a datetime.
Returns:
A datetime object with tz (default UTC).
210def to_date(value: TimeLike, relative_base: t.Optional[datetime] = None) -> date: 211 """Converts a value into a UTC date object 212 Args: 213 value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs. 214 relative_base: The datetime to reference for time expressions that are using relative terms 215 216 Raises: 217 ValueError if value cannot be converted to a date. 218 219 Returns: 220 A date object with tz utc. 221 """ 222 return to_datetime(value, relative_base).date()
Converts a value into a UTC date object
Arguments:
- value: A variety of date formats. If the value is number-like, it is assumed to be millisecond epochs.
- relative_base: The datetime to reference for time expressions that are using relative terms
Raises:
- ValueError if value cannot be converted to a date.
Returns:
A date object with tz utc.
225def date_dict( 226 execution_time: TimeLike, 227 start: t.Optional[TimeLike], 228 end: t.Optional[TimeLike], 229) -> t.Dict[str, TimeLike]: 230 """Creates a kwarg dictionary of datetime variables for use in SQL Contexts. 231 232 Keys are like start_date, start_ds, end_date, end_ds... 233 234 Args: 235 execution_time: Execution time. 236 start: Start time. 237 end: End time. 238 239 Returns: 240 A dictionary with various keys pointing to datetime formats. 241 """ 242 kwargs: t.Dict[str, t.Union[str, datetime, date, float, int]] = {} 243 244 execution_dt = to_datetime(execution_time) 245 prefixes = [ 246 ("latest", execution_dt), # TODO: Preserved for backward compatibility. Remove in 1.0.0. 247 ("execution", execution_dt), 248 ] 249 250 if start is not None: 251 prefixes.append(("start", to_datetime(start))) 252 if end is not None: 253 prefixes.append(("end", to_datetime(end))) 254 255 for prefix, time_like in prefixes: 256 dt = to_datetime(time_like) 257 dtntz = dt.replace(tzinfo=None) 258 259 millis = to_timestamp(time_like) 260 261 kwargs[f"{prefix}_dt"] = dt 262 kwargs[f"{prefix}_dtntz"] = dtntz 263 kwargs[f"{prefix}_date"] = to_date(dt) 264 kwargs[f"{prefix}_ds"] = to_ds(time_like) 265 kwargs[f"{prefix}_ts"] = to_ts(dt) 266 kwargs[f"{prefix}_tstz"] = to_tstz(dt) 267 kwargs[f"{prefix}_epoch"] = millis / 1000 268 kwargs[f"{prefix}_millis"] = millis 269 kwargs[f"{prefix}_hour"] = dt.hour 270 271 return kwargs
Creates a kwarg dictionary of datetime variables for use in SQL Contexts.
Keys are like start_date, start_ds, end_date, end_ds...
Arguments:
- execution_time: Execution time.
- start: Start time.
- end: End time.
Returns:
A dictionary with various keys pointing to datetime formats.
274def to_ds(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str: 275 """Converts a TimeLike object into YYYY-MM-DD formatted string.""" 276 return to_ts(obj, relative_base=relative_base)[0:10]
Converts a TimeLike object into YYYY-MM-DD formatted string.
279def to_ts(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str: 280 """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string.""" 281 return to_datetime(obj, relative_base=relative_base).replace(tzinfo=None).isoformat(sep=" ")
Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string.
284def to_tstz(obj: TimeLike, relative_base: t.Optional[datetime] = None) -> str: 285 """Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string.""" 286 return to_datetime(obj, relative_base=relative_base).isoformat(sep=" ")
Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string.
289def is_date(obj: TimeLike) -> bool: 290 """Checks if a TimeLike object should be treated like a date.""" 291 if isinstance(obj, date) and not isinstance(obj, datetime): 292 return True 293 294 try: 295 time.strptime(str(obj).replace("-", ""), DATE_INT_FMT) 296 return True 297 except ValueError: 298 return False
Checks if a TimeLike object should be treated like a date.
301def make_inclusive( 302 start: TimeLike, end: TimeLike, dialect: t.Optional[DialectType] = "" 303) -> DatetimeRange: 304 """Adjust start and end times to to become inclusive datetimes. 305 306 SQLMesh treats start and end times as inclusive so that filters can be written as 307 308 SELECT * FROM x WHERE ds BETWEEN @start_ds AND @end_ds. 309 SELECT * FROM x WHERE ts BETWEEN @start_ts AND @end_ts. 310 311 In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if 312 start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps 313 are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is 314 treated as an exclusive range and converted to inclusive by subtracting 1 microsecond. If the dialect is 315 T-SQL then 1 nanoseconds is subtracted to account for the increased precision. 316 317 Args: 318 start: Start timelike object. 319 end: End timelike object. 320 321 Example: 322 >>> make_inclusive("2020-01-01", "2020-01-01") 323 (datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2020, 1, 1, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc)) 324 325 Returns: 326 A tuple of inclusive datetime objects. 327 """ 328 return (to_datetime(start), make_inclusive_end(end, dialect=dialect))
Adjust start and end times to to become inclusive datetimes.
SQLMesh treats start and end times as inclusive so that filters can be written as
SELECT * FROM x WHERE ds BETWEEN @start_ds AND @end_ds. SELECT * FROM x WHERE ts BETWEEN @start_ts AND @end_ts.
In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is treated as an exclusive range and converted to inclusive by subtracting 1 microsecond. If the dialect is T-SQL then 1 nanoseconds is subtracted to account for the increased precision.
Arguments:
- start: Start timelike object.
- end: End timelike object.
Example:
>>> make_inclusive("2020-01-01", "2020-01-01") (datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime(2020, 1, 1, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc))
Returns:
A tuple of inclusive datetime objects.
331def make_inclusive_end(end: TimeLike, dialect: t.Optional[DialectType] = "") -> datetime: 332 import pandas as pd 333 334 exclusive_end = make_exclusive(end) 335 if dialect == "tsql": 336 return to_utc_timestamp(exclusive_end) - pd.Timedelta(1, unit="ns") 337 return exclusive_end - timedelta(microseconds=1)
380def is_categorical_relative_expression(expression: str) -> bool: 381 if expression.strip().lower() in DAY_SHORTCUT_EXPRESSIONS: 382 return True 383 grain_kwargs = freshness_date_parser.get_kwargs(expression) 384 if not grain_kwargs: 385 return False 386 return not any(k in TIME_UNITS for k in grain_kwargs)
389def is_relative(value: TimeLike) -> bool: 390 """ 391 Tests a TimeLike object to see if it is a relative expression, eg '1 week ago' as opposed to an absolute timestamp 392 """ 393 if isinstance(value, str): 394 return is_categorical_relative_expression(value) 395 396 return False
Tests a TimeLike object to see if it is a relative expression, eg '1 week ago' as opposed to an absolute timestamp
399def to_time_column( 400 time_column: t.Union[TimeLike, exp.Null], 401 time_column_type: exp.DataType, 402 dialect: str, 403 time_column_format: t.Optional[str] = None, 404 nullable: bool = False, 405) -> exp.Expr: 406 """Convert a TimeLike object to the same time format and type as the model's time column.""" 407 if dialect == "clickhouse" and time_column_type.is_type( 408 *(exp.DataType.TEMPORAL_TYPES - {exp.DataType.Type.DATE, exp.DataType.Type.DATE32}) 409 ): 410 if time_column_type.is_type(exp.DataType.Type.DATETIME64): 411 if nullable: 412 time_column_type.set("nullable", nullable) 413 else: 414 # Clickhouse will error if we pass fractional seconds to DateTime, so we always 415 # use DateTime64 for timestamps. 416 # 417 # `datetime` objects have microsecond precision, so we specify the type precision as 6. 418 # If a timezone is present in the passed type object, it is included in the DateTime64 type 419 # via the `expressions` arg. 420 time_column_type = exp.DataType.build( 421 exp.DataType.Type.DATETIME64, 422 expressions=[ 423 exp.DataTypeParam(this=exp.Literal(this=6, is_string=False)), 424 *time_column_type.expressions, 425 ], 426 nullable=nullable or time_column_type.args.get("nullable", False), 427 ) 428 429 if isinstance(time_column, exp.Null): 430 return exp.cast(time_column, to=time_column_type) 431 if time_column_type.is_type(exp.DataType.Type.DATE, exp.DataType.Type.DATE32): 432 return exp.cast(exp.Literal.string(to_ds(time_column)), to="date") 433 if time_column_type.is_type(*TEMPORAL_TZ_TYPES): 434 return exp.cast(exp.Literal.string(to_tstz(time_column)), to=time_column_type) 435 if time_column_type.is_type(*exp.DataType.TEMPORAL_TYPES): 436 return exp.cast(exp.Literal.string(to_ts(time_column)), to=time_column_type) 437 438 if time_column_format: 439 time_column = to_datetime(time_column).strftime(time_column_format) 440 if time_column_type.is_type(*exp.DataType.TEXT_TYPES): 441 return exp.Literal.string(time_column) 442 if time_column_type.is_type(*exp.DataType.NUMERIC_TYPES): 443 return exp.Literal.number(time_column) 444 return exp.convert(time_column)
Convert a TimeLike object to the same time format and type as the model's time column.
447def pandas_timestamp_to_pydatetime( 448 df: pd.DataFrame, columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None 449) -> pd.DataFrame: 450 import pandas as pd 451 from pandas.api.types import is_datetime64_any_dtype # type: ignore 452 453 for column in df.columns: 454 if is_datetime64_any_dtype(df.dtypes[column]): 455 # We must use `pd.Series` and dtype or pandas will convert it back to pd.Timestamp during assignment 456 # https://stackoverflow.com/a/68961834/1707525 457 df[column] = pd.Series(df[column].dt.to_pydatetime(), dtype="object") 458 459 if columns_to_types and columns_to_types[column].this in ( 460 exp.DataType.Type.DATE, 461 exp.DataType.Type.DATE32, 462 ): 463 # Sometimes `to_pydatetime()` has already converted to date, so we only extract from datetime objects. 464 df[column] = df[column].map( 465 lambda x: x.date() if type(x) is datetime and not pd.isna(x) else x 466 ) 467 468 return df
471def format_tz_datetime( 472 time: TimeLike, 473 format_string: t.Optional[str] = "%Y-%m-%d %I:%M%p %Z", 474 use_local_timezone: bool = False, 475) -> str: 476 output_datetime = to_datetime(time) 477 if use_local_timezone: 478 local_timezone = datetime.now().astimezone().tzinfo 479 output_datetime = output_datetime.astimezone(local_timezone) 480 return ( 481 output_datetime.strftime(format_string) 482 if format_string 483 else output_datetime.isoformat(sep=" ") 484 )