Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions bigframes/core/compile/ibis_compiler/scalar_op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):

@scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True)
def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp):
if x.type() == ibis_dtypes.str:
if x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you verify if the SQLGlot compiler requires similar handling? If it does, please enable bpd.options.experiments.sql_compiler = "experimental" and run the doc tests below. Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!! I forgot to update the SQLGlot compiler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you verify if the SQLGlot compiler requires similar handling? If it does, please enable bpd.options.experiments.sql_compiler = "experimental" and run the doc tests below. Thanks!

Updated the SQLGlot compiler and added tests. Also tested the code manually.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

return x.try_cast(ibis_dtypes.Timestamp(None)) # type: ignore
else:
# Numerical inputs.
Expand All @@ -1001,6 +1001,9 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp):
if op.format
else timestamp(x)
)
elif x.type() == ibis_dtypes.Timestamp(None): # type: ignore

return timestamp(x)
else:
# Numerical inputs.
if op.format:
Expand Down Expand Up @@ -2016,8 +2019,8 @@ def _ibis_num(number: float):


@ibis_udf.scalar.builtin
def timestamp(a: str) -> ibis_dtypes.timestamp: # type: ignore
"""Convert string to timestamp."""
def timestamp(a) -> ibis_dtypes.timestamp: # type: ignore
"""Convert string or a datetime to timestamp."""


@ibis_udf.scalar.builtin
Expand Down
4 changes: 2 additions & 2 deletions bigframes/core/compile/sqlglot/expressions/datetime_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression:
)
return sge.Cast(this=result, to="DATETIME")

if expr.dtype == dtypes.STRING_DTYPE:
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.TIMESTAMP_DTYPE):
return sge.TryCast(this=expr.expr, to="DATETIME")

value = expr.expr
Expand All @@ -396,7 +396,7 @@ def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression:
"PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC")
)

if expr.dtype == dtypes.STRING_DTYPE:
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.DATETIME_DTYPE):
return sge.func("TIMESTAMP", expr.expr)

value = expr.expr
Expand Down
2 changes: 2 additions & 0 deletions bigframes/operations/datetime_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
dtypes.INT_DTYPE,
dtypes.STRING_DTYPE,
dtypes.DATE_DTYPE,
dtypes.TIMESTAMP_DTYPE,
):
raise TypeError("expected string or numeric input")
return pd.ArrowDtype(pa.timestamp("us", tz=None))
Expand All @@ -91,6 +92,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
dtypes.INT_DTYPE,
dtypes.STRING_DTYPE,
dtypes.DATE_DTYPE,
dtypes.DATETIME_DTYPE,
):
raise TypeError("expected string or numeric input")
return pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
Expand Down
17 changes: 16 additions & 1 deletion bigframes/operations/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from __future__ import annotations

import datetime as dt
from typing import Optional
from typing import Literal, Optional

import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike
import bigframes_vendored.pandas.core.indexes.accessor as vendordt
Expand Down Expand Up @@ -147,6 +147,21 @@ def tz(self) -> Optional[dt.timezone]:
else:
raise ValueError(f"Unexpected timezone {tz_string}")

def tz_localize(self, tz: Literal["UTC"] | None) -> series.Series:
if tz == "UTC":
if self._data.dtype == dtypes.TIMESTAMP_DTYPE:
raise ValueError("Already tz-aware.")

return self._data._apply_unary_op(ops.ToTimestampOp())

if tz is None:
if self._data.dtype == dtypes.DATETIME_DTYPE:
return self._data # no-op

return self._data._apply_unary_op(ops.ToDatetimeOp())

raise ValueError(f"Unsupported timezone {tz}")

@property
def unit(self) -> str:
# Assumption: pyarrow dtype
Expand Down
36 changes: 36 additions & 0 deletions tests/system/small/operations/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,42 @@ def test_dt_tz(scalars_dfs, col_name):
assert bf_result == pd_result


@pytest.mark.parametrize(
("col_name", "tz"),
[
("datetime_col", None),
("timestamp_col", None),
("datetime_col", "UTC"),
],
)
def test_dt_tz_localize(scalars_dfs, col_name, tz):
pytest.importorskip("pandas", minversion="2.0.0")
scalars_df, scalars_pandas_df = scalars_dfs
bf_series = scalars_df[col_name]

bf_result = bf_series.dt.tz_localize(tz)
pd_result = scalars_pandas_df[col_name].dt.tz_localize(tz)

testing.assert_series_equal(
bf_result.to_pandas(), pd_result, check_index_type=False
)


@pytest.mark.parametrize(
("col_name", "tz"),
[
("timestamp_col", "UTC"),
("datetime_col", "US/Eastern"),
],
)
def test_dt_tz_localize_invalid_inputs(scalars_dfs, col_name, tz):
pytest.importorskip("pandas", minversion="2.0.0")
scalars_df, _ = scalars_dfs

with pytest.raises(ValueError):
scalars_df[col_name].dt.tz_localize(tz)


@pytest.mark.parametrize(
("col_name",),
DATETIME_COL_NAMES,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
SELECT
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `int64_col`,
SAFE_CAST(`string_col` AS DATETIME),
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`,
SAFE_CAST(`timestamp_col` AS DATETIME)
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ SELECT
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000000) AS INT64)) AS TIMESTAMP) AS `int64_col_s`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `int64_col_ms`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `int64_col_us`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`,
TIMESTAMP(`datetime_col`) AS `datetime_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot):


def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
col_names = ["int64_col", "string_col", "float64_col"]
col_names = ["int64_col", "string_col", "float64_col", "timestamp_col"]
bf_df = scalar_types_df[col_names]
ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names}

Expand All @@ -189,14 +189,15 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):


def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col"]]
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col", "datetime_col"]]
ops_map = {
"int64_col": ops.ToTimestampOp().as_expr("int64_col"),
"float64_col": ops.ToTimestampOp().as_expr("float64_col"),
"int64_col_s": ops.ToTimestampOp(unit="s").as_expr("int64_col"),
"int64_col_ms": ops.ToTimestampOp(unit="ms").as_expr("int64_col"),
"int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"),
"int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"),
"datetime_col": ops.ToTimestampOp().as_expr("datetime_col"),
}

sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
Expand Down
30 changes: 30 additions & 0 deletions third_party/bigframes_vendored/pandas/core/indexes/accessor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Literal

from bigframes import constants


Expand Down Expand Up @@ -499,6 +501,34 @@ def tz(self):

raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def tz_localize(self, tz: Literal["UTC"] | None):
"""Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.

This method takes a time zone (tz) naive Datetime Array/Index object and makes
this time zone aware. It does not move the time to another time zone. Only "UTC"
timezone is supported.

This method can also be used to do the inverse - to create a time zone unaware
object from an aware object. To that end, pass tz=None.

**Examples:**

>>> import bigframes.pandas as bpd
>>> s = bpd.Series([pd.Timestamp(year = 2026, month=1, day=1)])
>>> s
0 2026-01-01 00:00:00
dtype: timestamp[us][pyarrow]
>>> s.dt.tz_localize('UTC')
0 2026-01-01 00:00:00+00:00
dtype: timestamp[us, tz=UTC][pyarrow]

Returns:
A BigFrames series with the updated timezone.
"""

raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def unit(self) -> str:
"""Returns the unit of time precision.
Expand Down
Loading