Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 9797095

Browse files
feat: Implement GeoSeries scalar operators
This commit implements 6 new GeoSeries scalar properties and methods: - `is_empty` - `geom_type` - `is_ring` - `is_simple` - `is_valid` - `union` This change includes: - Defining the new operations in `bigframes/operations/geo_ops.py`. - Implementing the compilation logic for both Ibis and Polars backends. - Adding the new properties and methods to the `GeoSeries` class. - Adding unit tests for all new features.
1 parent 6b8154c commit 9797095

7 files changed

Lines changed: 433 additions & 0 deletions

File tree

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
BigFrames -> Ibis compilation for the operations in bigframes.operations.geo_ops.
17+
18+
Please keep implementations in sequential order by op name.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
from bigframes_vendored.ibis.expr import types as ibis_types
24+
import bigframes_vendored.ibis.udf.scalar as ibis_udf
25+
26+
from bigframes.core.compile.ibis_compiler.scalar_op_compiler import scalar_op_compiler
27+
from bigframes.operations import geo_ops
28+
29+
register_unary_op = scalar_op_compiler.register_unary_op
30+
register_binary_op = scalar_op_compiler.register_binary_op
31+
32+
33+
@ibis_udf.scalar.builtin("ST_IsEmpty")
34+
def st_isempty(x: ibis_types.GeoValue) -> ibis_types.BooleanValue:
35+
...
36+
37+
38+
@register_unary_op(geo_ops.geo_st_isempty_op)
39+
def geo_st_isempty_op_impl(x: ibis_types.Value):
40+
return st_isempty(x)
41+
42+
43+
@ibis_udf.scalar.builtin("ST_GeometryType")
44+
def st_geometrytype(x: ibis_types.GeoValue) -> ibis_types.StringValue:
45+
...
46+
47+
48+
@register_unary_op(geo_ops.geo_st_geometrytype_op)
49+
def geo_st_geometrytype_op_impl(x: ibis_types.Value):
50+
return st_geometrytype(x)
51+
52+
53+
@ibis_udf.scalar.builtin("ST_IsRing")
54+
def st_isring(x: ibis_types.GeoValue) -> ibis_types.BooleanValue:
55+
...
56+
57+
58+
@register_unary_op(geo_ops.geo_st_isring_op)
59+
def geo_st_isring_op_impl(x: ibis_types.Value):
60+
return st_isring(x)
61+
62+
63+
@ibis_udf.scalar.builtin("ST_EQUALS")
64+
def st_equals(
65+
x: ibis_types.GeoValue, y: ibis_types.GeoValue
66+
) -> ibis_types.BooleanValue:
67+
...
68+
69+
70+
@ibis_udf.scalar.builtin("ST_SIMPLIFY")
71+
def st_simplify(
72+
x: ibis_types.GeoValue, tolerance: ibis_types.NumericValue
73+
) -> ibis_types.GeoValue:
74+
...
75+
76+
77+
@register_unary_op(geo_ops.geo_st_issimple_op)
78+
def geo_st_issimple_op_impl(x: ibis_types.Value):
79+
simplified = st_simplify(x, 0.0)
80+
return st_equals(x, simplified)
81+
82+
83+
@ibis_udf.scalar.builtin("ST_ISVALID")
84+
def st_isvalid(x: ibis_types.GeoValue) -> ibis_types.BooleanValue:
85+
...
86+
87+
88+
@register_unary_op(geo_ops.geo_st_isvalid_op)
89+
def geo_st_isvalid_op_impl(x: ibis_types.Value):
90+
return st_isvalid(x)
91+
92+
93+
@ibis_udf.scalar.builtin("ST_UNION")
94+
def st_union(
95+
x: ibis_types.GeoValue, y: ibis_types.GeoValue
96+
) -> ibis_types.GeoValue:
97+
...
98+
99+
100+
@register_binary_op(geo_ops.geo_st_union_op)
101+
def geo_st_union_op_impl(
102+
x: ibis_types.Value, y: ibis_types.Value
103+
) -> ibis_types.Value:
104+
return st_union(x, y)

bigframes/core/compile/polars/compiler.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import bigframes.operations.datetime_ops as dt_ops
3939
import bigframes.operations.frequency_ops as freq_ops
4040
import bigframes.operations.generic_ops as gen_ops
41+
import bigframes.operations.geo_ops as geo_ops
4142
import bigframes.operations.json_ops as json_ops
4243
import bigframes.operations.numeric_ops as num_ops
4344
import bigframes.operations.string_ops as string_ops
@@ -437,6 +438,84 @@ def _(self, op: ops.ArrayReduceOp, input: pl.Expr) -> pl.Expr:
437438
f"Haven't implemented array aggregation: {op.aggregation}"
438439
)
439440

441+
@compile_op.register(geo_ops.GeoStIsemptyOp)
442+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
443+
return input.str.contains("EMPTY", literal=True)
444+
445+
@compile_op.register(geo_ops.GeoStGeometrytypeOp)
446+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
447+
return "ST_" + input.str.extract(r"^(\w+)", 1)
448+
449+
@compile_op.register(geo_ops.GeoStIsringOp)
450+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
451+
from shapely.errors import WKTReadingError
452+
import shapely.wkt
453+
454+
def is_ring(s: str | None) -> bool | None:
455+
if not s:
456+
return None
457+
try:
458+
geom = shapely.wkt.loads(s)
459+
return getattr(geom, "is_ring", False)
460+
except WKTReadingError:
461+
return None
462+
463+
return input.map_elements(is_ring, return_dtype=pl.Boolean())
464+
465+
@compile_op.register(geo_ops.GeoStIssimpleOp)
466+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
467+
from shapely.errors import WKTReadingError
468+
import shapely.wkt
469+
470+
def is_simple(s: str | None) -> bool | None:
471+
if not s:
472+
return None
473+
try:
474+
geom = shapely.wkt.loads(s)
475+
return getattr(geom, "is_simple", False)
476+
except WKTReadingError:
477+
return None
478+
479+
return input.map_elements(is_simple, return_dtype=pl.Boolean())
480+
481+
@compile_op.register(geo_ops.GeoStIsvalidOp)
482+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
483+
from shapely.errors import WKTReadingError
484+
import shapely.wkt
485+
486+
def is_valid(s: str | None) -> bool | None:
487+
if not s:
488+
return None
489+
try:
490+
geom = shapely.wkt.loads(s)
491+
return getattr(geom, "is_valid", False)
492+
except WKTReadingError:
493+
return None
494+
495+
return input.map_elements(is_valid, return_dtype=pl.Boolean())
496+
497+
@compile_op.register(geo_ops.GeoStUnionOp)
498+
def _(self, op: ops.ScalarOp, left: pl.Expr, right: pl.Expr) -> pl.Expr:
499+
from shapely.errors import WKTReadingError
500+
import shapely.wkt
501+
502+
def union(struct_val: dict[str, str | None]) -> str | None:
503+
# The fields in the struct are not guaranteed to be named.
504+
# Let's get them by order.
505+
s1, s2 = list(struct_val.values())
506+
if not s1 or not s2:
507+
return None
508+
try:
509+
g1 = shapely.wkt.loads(s1)
510+
g2 = shapely.wkt.loads(s2)
511+
return g1.union(g2).wkt
512+
except WKTReadingError:
513+
return None
514+
515+
return pl.struct([left, right]).map_elements(
516+
union, return_dtype=pl.String()
517+
)
518+
440519
@dataclasses.dataclass(frozen=True)
441520
class PolarsAggregateCompiler:
442521
scalar_compiler = PolarsExpressionCompiler()

bigframes/geopandas/geoseries.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,36 @@ def is_closed(self) -> bigframes.series.Series:
7474
f"GeoSeries.is_closed is not supported. Use bigframes.bigquery.st_isclosed(series), instead. {constants.FEEDBACK_LINK}"
7575
)
7676

77+
@property
78+
def is_empty(self) -> bigframes.series.Series:
79+
series = self._apply_unary_op(ops.geo_st_isempty_op)
80+
series.name = "is_empty"
81+
return series
82+
83+
@property
84+
def geom_type(self) -> bigframes.series.Series:
85+
series = self._apply_unary_op(ops.geo_st_geometrytype_op)
86+
series.name = "geom_type"
87+
return series
88+
89+
@property
90+
def is_ring(self) -> bigframes.series.Series:
91+
series = self._apply_unary_op(ops.geo_st_isring_op)
92+
series.name = "is_ring"
93+
return series
94+
95+
@property
96+
def is_simple(self) -> bigframes.series.Series:
97+
series = self._apply_unary_op(ops.geo_st_issimple_op)
98+
series.name = "is_simple"
99+
return series
100+
101+
@property
102+
def is_valid(self) -> bigframes.series.Series:
103+
series = self._apply_unary_op(ops.geo_st_isvalid_op)
104+
series.name = "is_valid"
105+
return series
106+
77107
@classmethod
78108
def from_wkt(
79109
cls,
@@ -123,3 +153,6 @@ def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # t
123153

124154
def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
125155
return self._apply_binary_op(other, ops.geo_st_intersection_op)
156+
157+
def union(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
158+
return self._apply_binary_op(other, ops.geo_st_union_op)

bigframes/operations/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@
108108
geo_st_geogpoint_op,
109109
geo_st_intersection_op,
110110
geo_st_isclosed_op,
111+
geo_st_isempty_op,
112+
geo_st_geometrytype_op,
113+
geo_st_isring_op,
114+
geo_st_issimple_op,
115+
geo_st_isvalid_op,
116+
geo_st_union_op,
111117
geo_x_op,
112118
geo_y_op,
113119
GeoStBufferOp,

bigframes/operations/geo_ops.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,51 @@
8484
)
8585
geo_st_isclosed_op = GeoStIsclosedOp()
8686

87+
GeoStIsemptyOp = base_ops.create_unary_op(
88+
name="geo_st_isempty",
89+
type_signature=op_typing.FixedOutputType(
90+
dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like"
91+
),
92+
)
93+
geo_st_isempty_op = GeoStIsemptyOp()
94+
95+
GeoStGeometrytypeOp = base_ops.create_unary_op(
96+
name="geo_st_geometrytype",
97+
type_signature=op_typing.FixedOutputType(
98+
dtypes.is_geo_like, dtypes.STRING_DTYPE, description="geo-like"
99+
),
100+
)
101+
geo_st_geometrytype_op = GeoStGeometrytypeOp()
102+
103+
GeoStIsringOp = base_ops.create_unary_op(
104+
name="geo_st_isring",
105+
type_signature=op_typing.FixedOutputType(
106+
dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like"
107+
),
108+
)
109+
geo_st_isring_op = GeoStIsringOp()
110+
111+
GeoStIssimpleOp = base_ops.create_unary_op(
112+
name="geo_st_issimple",
113+
type_signature=op_typing.FixedOutputType(
114+
dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like"
115+
),
116+
)
117+
geo_st_issimple_op = GeoStIssimpleOp()
118+
119+
GeoStIsvalidOp = base_ops.create_unary_op(
120+
name="geo_st_isvalid",
121+
type_signature=op_typing.FixedOutputType(
122+
dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like"
123+
),
124+
)
125+
geo_st_isvalid_op = GeoStIsvalidOp()
126+
127+
GeoStUnionOp = base_ops.create_binary_op(
128+
name="geo_st_union", type_signature=op_typing.BinaryGeo()
129+
)
130+
geo_st_union_op = GeoStUnionOp()
131+
87132
GeoXOp = base_ops.create_unary_op(
88133
name="geo_x",
89134
type_signature=op_typing.FixedOutputType(

tests/system/small/geopandas/test_geoseries.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,35 @@ def test_geo_is_closed_not_supported(session: bigframes.session.Session):
490490
bf_series.is_closed
491491

492492

493+
def test_geo_is_empty(session: bigframes.session.Session):
494+
bf_s = bigframes.geopandas.GeoSeries(
495+
[
496+
Polygon([]),
497+
Point(0, 0),
498+
LineString([]),
499+
Polygon([(0, 0), (1, 1), (0, 1)]),
500+
GeometryCollection([]),
501+
None,
502+
],
503+
session=session,
504+
)
505+
pd_s = geopandas.GeoSeries(
506+
[
507+
Polygon([]),
508+
Point(0, 0),
509+
LineString([]),
510+
Polygon([(0, 0), (1, 1), (0, 1)]),
511+
GeometryCollection([]),
512+
None,
513+
]
514+
)
515+
516+
bf_result = bf_s.is_empty.to_pandas()
517+
pd_result = pd_s.is_empty.astype("boolean")
518+
519+
assert_series_equal(bf_result, pd_result, check_index=False)
520+
521+
493522
def test_geo_buffer_raises_notimplemented(session: bigframes.session.Session):
494523
"""GeoPandas takes distance in units of the coordinate system, but BigQuery
495524
uses meters.

0 commit comments

Comments
 (0)