From 99389a12486907ccb6982b9a7225463b0d612ec7 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 19 Feb 2026 22:16:58 +0000 Subject: [PATCH 1/4] feat: add dt.tz_localize() --- .../ibis_compiler/scalar_op_registry.py | 10 +++--- bigframes/operations/datetime_ops.py | 2 ++ bigframes/operations/datetimes.py | 17 ++++++++- .../system/small/operations/test_datetimes.py | 36 +++++++++++++++++++ .../pandas/core/indexes/accessor.py | 30 ++++++++++++++++ 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 519b2c94426..73473f69442 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -16,7 +16,7 @@ import functools import typing -from typing import cast +from typing import cast, Union from bigframes_vendored import ibis import bigframes_vendored.ibis.expr.api as ibis_api @@ -978,7 +978,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp): @scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True) def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp): - if x.type() == ibis_dtypes.str: + if x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): return x.try_cast(ibis_dtypes.Timestamp(None)) # type: ignore else: # Numerical inputs. @@ -1001,6 +1001,8 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp): if op.format else timestamp(x) ) + elif x.type() == ibis_dtypes.Timestamp(None): # Datetime type + return timestamp(x) else: # Numerical inputs. if op.format: @@ -2016,8 +2018,8 @@ def _ibis_num(number: float): @ibis_udf.scalar.builtin -def timestamp(a: str) -> ibis_dtypes.timestamp: # type: ignore - """Convert string to timestamp.""" +def timestamp(a) -> ibis_dtypes.timestamp: # type: ignore + """Convert string or a datetime to timestamp.""" @ibis_udf.scalar.builtin diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py index 9988e8ed7b9..19541a383c8 100644 --- a/bigframes/operations/datetime_ops.py +++ b/bigframes/operations/datetime_ops.py @@ -73,6 +73,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT dtypes.INT_DTYPE, dtypes.STRING_DTYPE, dtypes.DATE_DTYPE, + dtypes.TIMESTAMP_DTYPE, ): raise TypeError("expected string or numeric input") return pd.ArrowDtype(pa.timestamp("us", tz=None)) @@ -91,6 +92,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT dtypes.INT_DTYPE, dtypes.STRING_DTYPE, dtypes.DATE_DTYPE, + dtypes.DATETIME_DTYPE, ): raise TypeError("expected string or numeric input") return pd.ArrowDtype(pa.timestamp("us", tz="UTC")) diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index 2eedb96b43e..f66c37bb645 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -15,7 +15,7 @@ from __future__ import annotations import datetime as dt -from typing import Optional +from typing import Literal, Optional import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike import bigframes_vendored.pandas.core.indexes.accessor as vendordt @@ -147,6 +147,21 @@ def tz(self) -> Optional[dt.timezone]: else: raise ValueError(f"Unexpected timezone {tz_string}") + def tz_localize(self, tz: Literal["UTC"] | None) -> series.Series: + if tz == "UTC": + if self._data.dtype == dtypes.TIMESTAMP_DTYPE: + raise ValueError("Already tz-aware.") + + return self._data._apply_unary_op(ops.ToTimestampOp()) + + if tz is None: + if self._data.dtype == dtypes.DATETIME_DTYPE: + return self._data # no-op + + return self._data._apply_unary_op(ops.ToDatetimeOp()) + + raise ValueError(f"Unsupported timezone {tz}") + @property def unit(self) -> str: # Assumption: pyarrow dtype diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 0e023189d56..ad632e1c2ca 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -324,6 +324,42 @@ def test_dt_tz(scalars_dfs, col_name): assert bf_result == pd_result +@pytest.mark.parametrize( + ("col_name", "tz"), + [ + ("datetime_col", None), + ("timestamp_col", None), + ("datetime_col", "UTC"), + ], +) +def test_dt_tz_localize(scalars_dfs, col_name, tz): + pytest.importorskip("pandas", minversion="2.0.0") + scalars_df, scalars_pandas_df = scalars_dfs + bf_series = scalars_df[col_name] + + bf_result = bf_series.dt.tz_localize(tz) + pd_result = scalars_pandas_df[col_name].dt.tz_localize(tz) + + testing.assert_series_equal( + bf_result.to_pandas(), pd_result, check_index_type=False + ) + + +@pytest.mark.parametrize( + ("col_name", "tz"), + [ + ("timestamp_col", "UTC"), + ("datetime_col", "US/Eastern"), + ], +) +def test_dt_tz_localize_invalid_inputs(scalars_dfs, col_name, tz): + pytest.importorskip("pandas", minversion="2.0.0") + scalars_df, _ = scalars_dfs + + with pytest.raises(ValueError): + scalars_df[col_name].dt.tz_localize(tz) + + @pytest.mark.parametrize( ("col_name",), DATETIME_COL_NAMES, diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index a0388317be8..a3404c222d4 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -1,3 +1,5 @@ +from typing import Literal + from bigframes import constants @@ -499,6 +501,34 @@ def tz(self): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + @property + def tz_localize(self, tz: Literal["UTC"] | None): + """Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. + + This method takes a time zone (tz) naive Datetime Array/Index object and makes + this time zone aware. It does not move the time to another time zone. Only "UTC" + timezone is supported. + + This method can also be used to do the inverse - to create a time zone unaware + object from an aware object. To that end, pass tz=None. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> s = bpd.Series([pd.Timestamp(year = 2026, month=1, day=1)]) + >>> s + 0 2026-01-01 00:00:00 + dtype: timestamp[us][pyarrow] + >>> s.dt.tz_localize('UTC') + 0 2026-01-01 00:00:00+00:00 + dtype: timestamp[us, tz=UTC][pyarrow] + + Returns: + A BigFrames series with the updated timezone. + """ + + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + @property def unit(self) -> str: """Returns the unit of time precision. From 4df5ccee5e262e37d9fbce671b9d7598cffbc293 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 19 Feb 2026 22:19:00 +0000 Subject: [PATCH 2/4] fix imports --- bigframes/core/compile/ibis_compiler/scalar_op_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 73473f69442..97cff2fd14a 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -16,7 +16,7 @@ import functools import typing -from typing import cast, Union +from typing import cast from bigframes_vendored import ibis import bigframes_vendored.ibis.expr.api as ibis_api From b82ed70d84fa07d7071863ca7bb9e161fbef51bf Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 19 Feb 2026 22:26:07 +0000 Subject: [PATCH 3/4] fix mypy --- bigframes/core/compile/ibis_compiler/scalar_op_registry.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 97cff2fd14a..3e1c24a78ed 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -978,7 +978,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp): @scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True) def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp): - if x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): + if x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): # type: ignore return x.try_cast(ibis_dtypes.Timestamp(None)) # type: ignore else: # Numerical inputs. @@ -1001,7 +1001,8 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp): if op.format else timestamp(x) ) - elif x.type() == ibis_dtypes.Timestamp(None): # Datetime type + elif x.type() == ibis_dtypes.Timestamp(None): # type: ignore + return timestamp(x) else: # Numerical inputs. From 3498c8062c9538a7865f97a1c6721df475f047f7 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 20 Feb 2026 01:02:29 +0000 Subject: [PATCH 4/4] update SQLGlot compiler too --- bigframes/core/compile/sqlglot/expressions/datetime_ops.py | 4 ++-- .../snapshots/test_datetime_ops/test_to_datetime/out.sql | 3 ++- .../snapshots/test_datetime_ops/test_to_timestamp/out.sql | 3 ++- .../core/compile/sqlglot/expressions/test_datetime_ops.py | 5 +++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py index 82f2f34edf3..a1c70262d55 100644 --- a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py @@ -371,7 +371,7 @@ def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression: ) return sge.Cast(this=result, to="DATETIME") - if expr.dtype == dtypes.STRING_DTYPE: + if expr.dtype in (dtypes.STRING_DTYPE, dtypes.TIMESTAMP_DTYPE): return sge.TryCast(this=expr.expr, to="DATETIME") value = expr.expr @@ -396,7 +396,7 @@ def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression: "PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC") ) - if expr.dtype == dtypes.STRING_DTYPE: + if expr.dtype in (dtypes.STRING_DTYPE, dtypes.DATETIME_DTYPE): return sge.func("TIMESTAMP", expr.expr) value = expr.expr diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql index 5cbfa3dbe77..3d0b8213b6e 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql @@ -1,5 +1,6 @@ SELECT CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `int64_col`, SAFE_CAST(`string_col` AS DATETIME), - CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col` + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`, + SAFE_CAST(`timestamp_col` AS DATETIME) FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql index eb829c05804..1e8910fad7c 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql @@ -4,5 +4,6 @@ SELECT CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000000) AS INT64)) AS TIMESTAMP) AS `int64_col_s`, CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `int64_col_ms`, CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `int64_col_us`, - CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns` + CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`, + TIMESTAMP(`datetime_col`) AS `datetime_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index 95156748e96..76966d3c9bb 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -180,7 +180,7 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot): def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): - col_names = ["int64_col", "string_col", "float64_col"] + col_names = ["int64_col", "string_col", "float64_col", "timestamp_col"] bf_df = scalar_types_df[col_names] ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names} @@ -189,7 +189,7 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col", "string_col", "float64_col"]] + bf_df = scalar_types_df[["int64_col", "string_col", "float64_col", "datetime_col"]] ops_map = { "int64_col": ops.ToTimestampOp().as_expr("int64_col"), "float64_col": ops.ToTimestampOp().as_expr("float64_col"), @@ -197,6 +197,7 @@ def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): "int64_col_ms": ops.ToTimestampOp(unit="ms").as_expr("int64_col"), "int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"), "int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"), + "datetime_col": ops.ToTimestampOp().as_expr("datetime_col"), } sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))