From ecac160e7b5f187e39bc86c555cf6d216fc3d30e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 22:14:16 +0000 Subject: [PATCH 1/9] feat: Implement ST_ISCLOSED geography function This commit implements the `ST_ISCLOSED` geography function. The following changes were made: - Added `GeoIsClosedOp` to `bigframes/operations/geo_ops.py`. - Added `st_isclosed` function to `bigframes/bigquery/_operations/geo.py`. - Added `is_closed` property to `GeoSeries` in `bigframes/geopandas/geoseries.py`. - Added system tests for the `is_closed` property. --- bigframes/bigquery/_operations/geo.py | 59 +++ bigframes/geopandas/geoseries.py | 20 ++ bigframes/operations/__init__.py | 2 + bigframes/operations/geo_ops.py | 5 + .../system/small/geopandas/test_geoseries.py | 339 +++++++++++------- 5 files changed, 288 insertions(+), 137 deletions(-) diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index fc9bd1a653..0c674f6b7a 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -380,3 +380,62 @@ def st_intersection( each aligned geometry with other. """ return series._apply_binary_op(other, ops.geo_st_intersection_op) + + +def st_isclosed( + series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries], +) -> bigframes.series.Series: + """ + Returns TRUE for a non-empty Geography, where each element in the + Geography has an empty boundary. + + .. note:: + BigQuery's Geography functions, like `st_isclosed`, interpret the geometry + data type as a point set on the Earth's surface. A point set is a set + of points, lines, and polygons on the WGS84 reference spheroid, with + geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data + + **Examples:** + + >>> import bigframes.geopandas + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> from shapely.geometry import Point, LineString, Polygon + >>> bpd.options.display.progress_bar = None + + >>> series = bigframes.geopandas.GeoSeries( + ... [ + ... Point(0, 0), # Point + ... LineString([(0, 0), (1, 1)]), # Open LineString + ... LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString + ... Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon + ... None, # geometry empty not supported by shapely use ST_GEOGFROMTEXT('GEOMETRYCOLLECTION EMPTY') instead + ... ] + ... ) + >>> series + 0 POINT (0 0) + 1 LINESTRING (0 0, 1 1) + 2 LINESTRING (0 0, 1 1, 0 1, 0 0) + 3 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 4 None + dtype: geometry + + >>> bbq.st_isclosed(series) + 0 True + 1 False + 2 True + 3 True + 4 None + dtype: boolean + + Args: + series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries): + A series containing geography objects. + + Returns: + bigframes.pandas.Series: + Series of booleans indicating whether each geometry is closed. + """ + series = series._apply_unary_op(ops.GeoIsClosedOp()) + series.name = None + return series diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 38ebda7d92..062411dd78 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -57,6 +57,26 @@ def boundary(self) -> bigframes.series.Series: # type: ignore series.name = None return series + @property + def is_closed(self) -> bigframes.series.Series: + """ + Checks if each geometry is closed. + + A point is closed. + A linestring is closed if its start and end points are the same. + A polygon is closed if it's a full polygon. + A collection is closed if and only if every element in the collection is closed. + An empty GEOGRAPHY isn't closed. + + Returns: + bigframes.series.Series: A Series of booleans. + """ + from bigframes.bigquery._operations import geo as geo_ops + + series = geo_ops.st_isclosed(self) + series.name = None + return series + @classmethod def from_wkt(cls, data, index=None) -> GeoSeries: series = bigframes.series.Series(data, index=index) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 3e97ec6f4a..f179b29a6a 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -98,6 +98,7 @@ geo_st_geogfromtext_op, geo_st_geogpoint_op, geo_st_intersection_op, + GeoIsClosedOp, geo_x_op, geo_y_op, GeoStDistanceOp, @@ -385,6 +386,7 @@ "geo_st_geogfromtext_op", "geo_st_geogpoint_op", "geo_st_intersection_op", + "GeoIsClosedOp", "geo_x_op", "geo_y_op", "GeoStDistanceOp", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 98da9099cd..0faa6d8e1f 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -73,6 +73,11 @@ ) +@dataclasses.dataclass(frozen=True) +class GeoIsClosedOp(base_ops.UnaryOp): + name = "st_isclosed" + + @dataclasses.dataclass(frozen=True) class GeoStDistanceOp(base_ops.BinaryOp): name = "st_distance" diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index ae99fd6fc2..c66bce77b0 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -31,7 +31,6 @@ import bigframes.geopandas import bigframes.pandas import bigframes.series -from tests.system.utils import assert_series_equal @pytest.fixture(scope="session") @@ -41,40 +40,6 @@ def urban_areas_dfs(session, urban_areas_table_id): return (bf_ua, pd_ua) -def test_geo_x(urban_areas_dfs): - bf_ua, pd_ua = urban_areas_dfs - bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo - pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) - bf_result = bf_series.x.to_pandas() - pd_result = pd_series.x - - assert_series_equal( - pd_result.astype(pd.Float64Dtype()), - bf_result, - ) - - -def test_geo_x_non_point(urban_areas_dfs): - bf_ua, _ = urban_areas_dfs - bf_series: bigframes.geopandas.GeoSeries = bf_ua["urban_area_geom"].geo - - with pytest.raises(google.api_core.exceptions.BadRequest, match="ST_X"): - bf_series.x.to_pandas() - - -def test_geo_y(urban_areas_dfs): - bf_ua, pd_ua = urban_areas_dfs - bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo - pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) - bf_result = bf_series.y.to_pandas() - pd_result = pd_series.y - - assert_series_equal( - pd_result.astype(pd.Float64Dtype()), - bf_result, - ) - - def test_geo_area_not_supported(): s = bigframes.pandas.Series( [ @@ -96,108 +61,6 @@ def test_geo_area_not_supported(): bf_series.area -def test_geo_distance_not_supported(): - s1 = bigframes.pandas.Series( - [ - Polygon([(0, 0), (1, 1), (0, 1)]), - Polygon([(10, 0), (10, 5), (0, 0)]), - Polygon([(0, 0), (2, 2), (2, 0)]), - LineString([(0, 0), (1, 1), (0, 1)]), - Point(0, 1), - ], - dtype=GeometryDtype(), - ) - s2 = bigframes.geopandas.GeoSeries( - [ - Polygon([(0, 0), (1, 1), (0, 1)]), - Polygon([(10, 0), (10, 5), (0, 0)]), - Polygon([(0, 0), (2, 2), (2, 0)]), - LineString([(0, 0), (1, 1), (0, 1)]), - Point(0, 1), - ] - ) - with pytest.raises( - NotImplementedError, - match=re.escape("GeoSeries.distance is not supported."), - ): - s1.geo.distance(s2) - - -def test_geo_from_xy(): - x = [2.5, 5, -3.0] - y = [0.5, 1, 1.5] - bf_result = ( - bigframes.geopandas.GeoSeries.from_xy(x, y) - .astype(geopandas.array.GeometryDtype()) - .to_pandas() - ) - pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype( - geopandas.array.GeometryDtype() - ) - - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_series_type=False, - check_index=False, - ) - - -def test_geo_from_wkt(): - wkts = [ - "Point(0 1)", - "Point(2 4)", - "Point(5 3)", - "Point(6 8)", - ] - - bf_result = bigframes.geopandas.GeoSeries.from_wkt(wkts).to_pandas() - - pd_result = geopandas.GeoSeries.from_wkt(wkts) - - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_series_type=False, - check_index=False, - ) - - -def test_geo_to_wkt(): - bf_geo = bigframes.geopandas.GeoSeries( - [ - Point(0, 1), - Point(2, 4), - Point(5, 3), - Point(6, 8), - ] - ) - - pd_geo = geopandas.GeoSeries( - [ - Point(0, 1), - Point(2, 4), - Point(5, 3), - Point(6, 8), - ] - ) - - # Test was failing before using str.replace because the pd_result had extra - # whitespace "POINT (0 1)" while bf_result had none "POINT(0 1)". - # str.replace replaces any encountered whitespaces with none. - bf_result = ( - bf_geo.to_wkt().astype("string[pyarrow]").to_pandas().str.replace(" ", "") - ) - - pd_result = pd_geo.to_wkt().astype("string[pyarrow]").str.replace(" ", "") - - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_index=False, - ) - - def test_geo_boundary(): bf_s = bigframes.pandas.Series( [ @@ -321,6 +184,33 @@ def test_geo_difference_with_similar_geometry_objects(): assert expected.iloc[2].equals(bf_result.iloc[2]) +def test_geo_distance_not_supported(): + s1 = bigframes.pandas.Series( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(10, 0), (10, 5), (0, 0)]), + Polygon([(0, 0), (2, 2), (2, 0)]), + LineString([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ], + dtype=GeometryDtype(), + ) + s2 = bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(10, 0), (10, 5), (0, 0)]), + Polygon([(0, 0), (2, 2), (2, 0)]), + LineString([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + ) + with pytest.raises( + NotImplementedError, + match=re.escape("GeoSeries.distance is not supported."), + ): + s1.geo.distance(s2) + + def test_geo_drop_duplicates(): bf_series = bigframes.geopandas.GeoSeries( [Point(1, 1), Point(2, 2), Point(3, 3), Point(2, 2)] @@ -338,6 +228,46 @@ def test_geo_drop_duplicates(): ) +def test_geo_from_wkt(): + wkts = [ + "Point(0 1)", + "Point(2 4)", + "Point(5 3)", + "Point(6 8)", + ] + + bf_result = bigframes.geopandas.GeoSeries.from_wkt(wkts).to_pandas() + + pd_result = geopandas.GeoSeries.from_wkt(wkts) + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_series_type=False, + check_index=False, + ) + + +def test_geo_from_xy(): + x = [2.5, 5, -3.0] + y = [0.5, 1, 1.5] + bf_result = ( + bigframes.geopandas.GeoSeries.from_xy(x, y) + .astype(geopandas.array.GeometryDtype()) + .to_pandas() + ) + pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype( + geopandas.array.GeometryDtype() + ) + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_series_type=False, + check_index=False, + ) + + # the GeoSeries and GeoPandas results are not always the same. # For example, when the intersection between two polygons is empty, # GeoPandas returns 'POLYGON EMPTY' while GeoSeries returns 'GeometryCollection([])'. @@ -429,3 +359,138 @@ def test_geo_intersection_with_similar_geometry_objects(): assert expected.iloc[0].equals(bf_result.iloc[0]) assert expected.iloc[1].equals(bf_result.iloc[1]) assert expected.iloc[2].equals(bf_result.iloc[2]) + + +def test_geo_is_closed(): + bf_gs = bigframes.geopandas.GeoSeries( + [ + Point(0, 0), # Point + LineString([(0, 0), (1, 1)]), # Open LineString + LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon + GeometryCollection(), # Empty GeometryCollection + bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[0], # Also empty + None, # Should be filtered out by dropna + ], + index=[0, 1, 2, 3, 4, 5, 6], + ) + pd_gs = geopandas.GeoSeries( + [ + Point(0, 0), + LineString([(0, 0), (1, 1)]), + LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + GeometryCollection(), + geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[0], + None, + ], + index=pd.Index([0, 1, 2, 3, 4, 5, 6], dtype="Int64"), + crs="EPSG:4326", + ) + + bf_result = bf_gs.is_closed.to_pandas().dropna().astype(bool) + pd_result = pd_gs.is_closed.dropna().astype(bool) + + #Expected results based on ST_ISCLOSED documentation: + # Point: True + # Open LineString: False + # Closed LineString: True + # Polygon: True (assuming it's a full polygon, which it is) + # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) + # GEOMETRYCOLLECTION EMPTY: False + + expected_data = [True, False, True, True, False, False] + expected_index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") + expected_series = pd.Series(data=expected_data, index=expected_index, name="is_closed", dtype=bool) + + # First check BigQuery DataFrames result against expected + pd.testing.assert_series_equal( + bf_result, + expected_series, + check_dtype=False, # Pandas dtypes can be tricky with Nones involved before dropna + check_index_type=False, + check_series_type=False, + check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas + ) + + # Then check BigQuery DataFrames result against pandas/geopandas result + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_dtype=False, # Pandas dtypes can be tricky + check_index_type=False, + check_series_type=False, + check_names=False, # Name might differ + ) + + +def test_geo_to_wkt(): + bf_geo = bigframes.geopandas.GeoSeries( + [ + Point(0, 1), + Point(2, 4), + Point(5, 3), + Point(6, 8), + ] + ) + + pd_geo = geopandas.GeoSeries( + [ + Point(0, 1), + Point(2, 4), + Point(5, 3), + Point(6, 8), + ] + ) + + # Test was failing before using str.replace because the pd_result had extra + # whitespace "POINT (0 1)" while bf_result had none "POINT(0 1)". + # str.replace replaces any encountered whitespaces with none. + bf_result = ( + bf_geo.to_wkt().astype("string[pyarrow]").to_pandas().str.replace(" ", "") + ) + + pd_result = pd_geo.to_wkt().astype("string[pyarrow]").str.replace(" ", "") + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_index=False, + ) + + +def test_geo_x(urban_areas_dfs): + bf_ua, pd_ua = urban_areas_dfs + bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo + pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) + bf_result = bf_series.x.to_pandas() + pd_result = pd_series.x + + # We need to use the original assert_series_equal for this test as pd.testing.assert_series_equal + # does not support the pd_result.astype(pd.Float64Dtype()), bf_result, combination + from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case + assert_series_equal( + pd_result.astype(pd.Float64Dtype()), + bf_result, + ) + + +def test_geo_x_non_point(urban_areas_dfs): + bf_ua, _ = urban_areas_dfs + bf_series: bigframes.geopandas.GeoSeries = bf_ua["urban_area_geom"].geo + + with pytest.raises(google.api_core.exceptions.BadRequest, match="ST_X"): + bf_series.x.to_pandas() + + +def test_geo_y(urban_areas_dfs): + bf_ua, pd_ua = urban_areas_dfs + bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo + pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) + bf_result = bf_series.y.to_pandas() + pd_result = pd_series.y + from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case + assert_series_equal( + pd_result.astype(pd.Float64Dtype()), + bf_result, + ) From 1432425f819c4c729ac13adbb604f4551c40d15b Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 3 Jun 2025 22:18:14 +0000 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/operations/__init__.py | 2 +- .../system/small/geopandas/test_geoseries.py | 28 +++++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index f179b29a6a..abe68c549f 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -98,9 +98,9 @@ geo_st_geogfromtext_op, geo_st_geogpoint_op, geo_st_intersection_op, - GeoIsClosedOp, geo_x_op, geo_y_op, + GeoIsClosedOp, GeoStDistanceOp, ) from bigframes.operations.json_ops import ( diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index c66bce77b0..6bb3af202e 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -368,8 +368,10 @@ def test_geo_is_closed(): LineString([(0, 0), (1, 1)]), # Open LineString LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon - GeometryCollection(), # Empty GeometryCollection - bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[0], # Also empty + GeometryCollection(), # Empty GeometryCollection + bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[ + 0 + ], # Also empty None, # Should be filtered out by dropna ], index=[0, 1, 2, 3, 4, 5, 6], @@ -391,7 +393,7 @@ def test_geo_is_closed(): bf_result = bf_gs.is_closed.to_pandas().dropna().astype(bool) pd_result = pd_gs.is_closed.dropna().astype(bool) - #Expected results based on ST_ISCLOSED documentation: + # Expected results based on ST_ISCLOSED documentation: # Point: True # Open LineString: False # Closed LineString: True @@ -401,7 +403,9 @@ def test_geo_is_closed(): expected_data = [True, False, True, True, False, False] expected_index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") - expected_series = pd.Series(data=expected_data, index=expected_index, name="is_closed", dtype=bool) + expected_series = pd.Series( + data=expected_data, index=expected_index, name="is_closed", dtype=bool + ) # First check BigQuery DataFrames result against expected pd.testing.assert_series_equal( @@ -410,17 +414,17 @@ def test_geo_is_closed(): check_dtype=False, # Pandas dtypes can be tricky with Nones involved before dropna check_index_type=False, check_series_type=False, - check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas + check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas ) # Then check BigQuery DataFrames result against pandas/geopandas result pd.testing.assert_series_equal( bf_result, pd_result, - check_dtype=False, # Pandas dtypes can be tricky + check_dtype=False, # Pandas dtypes can be tricky check_index_type=False, check_series_type=False, - check_names=False, # Name might differ + check_names=False, # Name might differ ) @@ -468,7 +472,10 @@ def test_geo_x(urban_areas_dfs): # We need to use the original assert_series_equal for this test as pd.testing.assert_series_equal # does not support the pd_result.astype(pd.Float64Dtype()), bf_result, combination - from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case + from tests.system.utils import ( # noqa: F811 - Reimport for this specific case + assert_series_equal, + ) + assert_series_equal( pd_result.astype(pd.Float64Dtype()), bf_result, @@ -489,7 +496,10 @@ def test_geo_y(urban_areas_dfs): pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) bf_result = bf_series.y.to_pandas() pd_result = pd_series.y - from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case + from tests.system.utils import ( # noqa: F811 - Reimport for this specific case + assert_series_equal, + ) + assert_series_equal( pd_result.astype(pd.Float64Dtype()), bf_result, From c2284cbb04f3c5781c98e8bdce99c3f4d3e4be1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 4 Jun 2025 10:38:59 -0500 Subject: [PATCH 3/9] fix mypy failure --- bigframes/operations/geo_ops.py | 3 +++ .../system/small/geopandas/test_geoseries.py | 21 ++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 0faa6d8e1f..ffada12d53 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -77,6 +77,9 @@ class GeoIsClosedOp(base_ops.UnaryOp): name = "st_isclosed" + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return dtypes.BOOL_DTYPE + @dataclasses.dataclass(frozen=True) class GeoStDistanceOp(base_ops.BinaryOp): diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index 6bb3af202e..0a39cbd48e 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -394,15 +394,15 @@ def test_geo_is_closed(): pd_result = pd_gs.is_closed.dropna().astype(bool) # Expected results based on ST_ISCLOSED documentation: - # Point: True - # Open LineString: False - # Closed LineString: True - # Polygon: True (assuming it's a full polygon, which it is) - # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) - # GEOMETRYCOLLECTION EMPTY: False - - expected_data = [True, False, True, True, False, False] - expected_index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") + expected_data = [ + True, # Point: True + False, # Open LineString: False + True, # Closed LineString: True + True, # Polygon: True (assuming it's a full polygon, which it is) + False, # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) + False, # GEOMETRYCOLLECTION EMPTY: False + ] + expected_index: pd.Index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") expected_series = pd.Series( data=expected_data, index=expected_index, name="is_closed", dtype=bool ) @@ -411,9 +411,6 @@ def test_geo_is_closed(): pd.testing.assert_series_equal( bf_result, expected_series, - check_dtype=False, # Pandas dtypes can be tricky with Nones involved before dropna - check_index_type=False, - check_series_type=False, check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas ) From a56f5c172d5bcf9fa7384d6df4229d3bedd85a2a Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 15:47:32 +0000 Subject: [PATCH 4/9] feat: Implement ST_ISCLOSED geography function This commit implements the `ST_ISCLOSED` geography function. The following changes were made: - Added `GeoIsClosedOp` to `bigframes/operations/geo_ops.py`. - Added `st_isclosed` function to `bigframes/bigquery/_operations/geo.py`. - Added `is_closed` property to `GeoSeries` in `bigframes/geopandas/geoseries.py`. - Registered `GeoIsClosedOp` in `bigframes/core/compile/scalar_op_compiler.py` by defining an Ibis UDF and registering the op. - Added system checks for the `is_closed` property. --- bigframes/core/compile/scalar_op_compiler.py | 10 ++++ bigframes/operations/__init__.py | 2 +- bigframes/operations/geo_ops.py | 3 -- .../system/small/geopandas/test_geoseries.py | 49 ++++++++----------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 7707f16dad..e0c3879fe2 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1074,6 +1074,11 @@ def geo_st_intersection_op_impl(x: ibis_types.Value, y: ibis_types.Value): ) +@scalar_op_compiler.register_unary_op(ops.GeoIsClosedOp, pass_op=False) +def geo_st_isclosed_op_impl(x: ibis_types.Value): + return st_isclosed(x) + + @scalar_op_compiler.register_unary_op(ops.geo_x_op) def geo_x_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).x() @@ -2180,6 +2185,11 @@ def str_lstrip_op( # type: ignore[empty-body] """Remove leading and trailing characters.""" +@ibis_udf.scalar.builtin +def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignore + """Checks if a geography is closed.""" + + @ibis_udf.scalar.builtin(name="rtrim") def str_rstrip_op( # type: ignore[empty-body] x: ibis_dtypes.String, to_strip: ibis_dtypes.String diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index abe68c549f..f179b29a6a 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -98,9 +98,9 @@ geo_st_geogfromtext_op, geo_st_geogpoint_op, geo_st_intersection_op, + GeoIsClosedOp, geo_x_op, geo_y_op, - GeoIsClosedOp, GeoStDistanceOp, ) from bigframes.operations.json_ops import ( diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index ffada12d53..0faa6d8e1f 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -77,9 +77,6 @@ class GeoIsClosedOp(base_ops.UnaryOp): name = "st_isclosed" - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - return dtypes.BOOL_DTYPE - @dataclasses.dataclass(frozen=True) class GeoStDistanceOp(base_ops.BinaryOp): diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index 0a39cbd48e..c66bce77b0 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -368,10 +368,8 @@ def test_geo_is_closed(): LineString([(0, 0), (1, 1)]), # Open LineString LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon - GeometryCollection(), # Empty GeometryCollection - bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[ - 0 - ], # Also empty + GeometryCollection(), # Empty GeometryCollection + bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[0], # Also empty None, # Should be filtered out by dropna ], index=[0, 1, 2, 3, 4, 5, 6], @@ -393,35 +391,36 @@ def test_geo_is_closed(): bf_result = bf_gs.is_closed.to_pandas().dropna().astype(bool) pd_result = pd_gs.is_closed.dropna().astype(bool) - # Expected results based on ST_ISCLOSED documentation: - expected_data = [ - True, # Point: True - False, # Open LineString: False - True, # Closed LineString: True - True, # Polygon: True (assuming it's a full polygon, which it is) - False, # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) - False, # GEOMETRYCOLLECTION EMPTY: False - ] - expected_index: pd.Index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") - expected_series = pd.Series( - data=expected_data, index=expected_index, name="is_closed", dtype=bool - ) + #Expected results based on ST_ISCLOSED documentation: + # Point: True + # Open LineString: False + # Closed LineString: True + # Polygon: True (assuming it's a full polygon, which it is) + # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) + # GEOMETRYCOLLECTION EMPTY: False + + expected_data = [True, False, True, True, False, False] + expected_index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") + expected_series = pd.Series(data=expected_data, index=expected_index, name="is_closed", dtype=bool) # First check BigQuery DataFrames result against expected pd.testing.assert_series_equal( bf_result, expected_series, - check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas + check_dtype=False, # Pandas dtypes can be tricky with Nones involved before dropna + check_index_type=False, + check_series_type=False, + check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas ) # Then check BigQuery DataFrames result against pandas/geopandas result pd.testing.assert_series_equal( bf_result, pd_result, - check_dtype=False, # Pandas dtypes can be tricky + check_dtype=False, # Pandas dtypes can be tricky check_index_type=False, check_series_type=False, - check_names=False, # Name might differ + check_names=False, # Name might differ ) @@ -469,10 +468,7 @@ def test_geo_x(urban_areas_dfs): # We need to use the original assert_series_equal for this test as pd.testing.assert_series_equal # does not support the pd_result.astype(pd.Float64Dtype()), bf_result, combination - from tests.system.utils import ( # noqa: F811 - Reimport for this specific case - assert_series_equal, - ) - + from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case assert_series_equal( pd_result.astype(pd.Float64Dtype()), bf_result, @@ -493,10 +489,7 @@ def test_geo_y(urban_areas_dfs): pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) bf_result = bf_series.y.to_pandas() pd_result = pd_series.y - from tests.system.utils import ( # noqa: F811 - Reimport for this specific case - assert_series_equal, - ) - + from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case assert_series_equal( pd_result.astype(pd.Float64Dtype()), bf_result, From 8f1df466c4faed762d15c1a8330e2cfb13e48eb8 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 4 Jun 2025 15:50:02 +0000 Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/operations/__init__.py | 2 +- .../system/small/geopandas/test_geoseries.py | 28 +++++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index f179b29a6a..abe68c549f 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -98,9 +98,9 @@ geo_st_geogfromtext_op, geo_st_geogpoint_op, geo_st_intersection_op, - GeoIsClosedOp, geo_x_op, geo_y_op, + GeoIsClosedOp, GeoStDistanceOp, ) from bigframes.operations.json_ops import ( diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index c66bce77b0..6bb3af202e 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -368,8 +368,10 @@ def test_geo_is_closed(): LineString([(0, 0), (1, 1)]), # Open LineString LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon - GeometryCollection(), # Empty GeometryCollection - bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[0], # Also empty + GeometryCollection(), # Empty GeometryCollection + bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[ + 0 + ], # Also empty None, # Should be filtered out by dropna ], index=[0, 1, 2, 3, 4, 5, 6], @@ -391,7 +393,7 @@ def test_geo_is_closed(): bf_result = bf_gs.is_closed.to_pandas().dropna().astype(bool) pd_result = pd_gs.is_closed.dropna().astype(bool) - #Expected results based on ST_ISCLOSED documentation: + # Expected results based on ST_ISCLOSED documentation: # Point: True # Open LineString: False # Closed LineString: True @@ -401,7 +403,9 @@ def test_geo_is_closed(): expected_data = [True, False, True, True, False, False] expected_index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") - expected_series = pd.Series(data=expected_data, index=expected_index, name="is_closed", dtype=bool) + expected_series = pd.Series( + data=expected_data, index=expected_index, name="is_closed", dtype=bool + ) # First check BigQuery DataFrames result against expected pd.testing.assert_series_equal( @@ -410,17 +414,17 @@ def test_geo_is_closed(): check_dtype=False, # Pandas dtypes can be tricky with Nones involved before dropna check_index_type=False, check_series_type=False, - check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas + check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas ) # Then check BigQuery DataFrames result against pandas/geopandas result pd.testing.assert_series_equal( bf_result, pd_result, - check_dtype=False, # Pandas dtypes can be tricky + check_dtype=False, # Pandas dtypes can be tricky check_index_type=False, check_series_type=False, - check_names=False, # Name might differ + check_names=False, # Name might differ ) @@ -468,7 +472,10 @@ def test_geo_x(urban_areas_dfs): # We need to use the original assert_series_equal for this test as pd.testing.assert_series_equal # does not support the pd_result.astype(pd.Float64Dtype()), bf_result, combination - from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case + from tests.system.utils import ( # noqa: F811 - Reimport for this specific case + assert_series_equal, + ) + assert_series_equal( pd_result.astype(pd.Float64Dtype()), bf_result, @@ -489,7 +496,10 @@ def test_geo_y(urban_areas_dfs): pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) bf_result = bf_series.y.to_pandas() pd_result = pd_series.y - from tests.system.utils import assert_series_equal # noqa: F811 - Reimport for this specific case + from tests.system.utils import ( # noqa: F811 - Reimport for this specific case + assert_series_equal, + ) + assert_series_equal( pd_result.astype(pd.Float64Dtype()), bf_result, From 4379a7bb871405d4085546cac53da029965c17a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 4 Jun 2025 11:39:17 -0500 Subject: [PATCH 6/9] wait to implement geoseries.is_closed for now --- bigframes/bigquery/__init__.py | 2 + bigframes/bigquery/_operations/geo.py | 2 +- bigframes/core/compile/scalar_op_compiler.py | 2 +- bigframes/geopandas/geoseries.py | 23 ++----- bigframes/operations/__init__.py | 4 +- bigframes/operations/geo_ops.py | 12 ++-- tests/system/small/bigquery/test_geo.py | 38 +++++++++++ .../system/small/geopandas/test_geoseries.py | 67 ------------------- .../bigframes_vendored/geopandas/geoseries.py | 22 ++++++ 9 files changed, 79 insertions(+), 93 deletions(-) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 301207bb31..b4c08d1953 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -32,6 +32,7 @@ st_difference, st_distance, st_intersection, + st_isclosed, ) from bigframes.bigquery._operations.json import ( json_extract, @@ -58,6 +59,7 @@ "st_difference", "st_distance", "st_intersection", + "st_isclosed", # json ops "json_extract", "json_extract_array", diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 0c674f6b7a..74d9d4bf4f 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -436,6 +436,6 @@ def st_isclosed( bigframes.pandas.Series: Series of booleans indicating whether each geometry is closed. """ - series = series._apply_unary_op(ops.GeoIsClosedOp()) + series = series._apply_unary_op(ops.geo_st_isclosed_op) series.name = None return series diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index e0c3879fe2..8bed0f1e01 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1074,7 +1074,7 @@ def geo_st_intersection_op_impl(x: ibis_types.Value, y: ibis_types.Value): ) -@scalar_op_compiler.register_unary_op(ops.GeoIsClosedOp, pass_op=False) +@scalar_op_compiler.register_unary_op(ops.geo_st_isclosed_op, pass_op=False) def geo_st_isclosed_op_impl(x: ibis_types.Value): return st_isclosed(x) diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 062411dd78..571ca3e930 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -59,23 +59,12 @@ def boundary(self) -> bigframes.series.Series: # type: ignore @property def is_closed(self) -> bigframes.series.Series: - """ - Checks if each geometry is closed. - - A point is closed. - A linestring is closed if its start and end points are the same. - A polygon is closed if it's a full polygon. - A collection is closed if and only if every element in the collection is closed. - An empty GEOGRAPHY isn't closed. - - Returns: - bigframes.series.Series: A Series of booleans. - """ - from bigframes.bigquery._operations import geo as geo_ops - - series = geo_ops.st_isclosed(self) - series.name = None - return series + # TODO(tswast): GeoPandas doesn't treat Point as closed. Use ST_LENGTH + # when available to filter out "closed" shapes that return false in + # GeoPandas. + raise NotImplementedError( + f"GeoSeries.is_closed is not supported. Use bigframes.bigquery.st_isclosed(series), instead. {constants.FEEDBACK_LINK}" + ) @classmethod def from_wkt(cls, data, index=None) -> GeoSeries: diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index abe68c549f..4c04b1e793 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -98,9 +98,9 @@ geo_st_geogfromtext_op, geo_st_geogpoint_op, geo_st_intersection_op, + geo_st_isclosed_op, geo_x_op, geo_y_op, - GeoIsClosedOp, GeoStDistanceOp, ) from bigframes.operations.json_ops import ( @@ -386,7 +386,7 @@ "geo_st_geogfromtext_op", "geo_st_geogpoint_op", "geo_st_intersection_op", - "GeoIsClosedOp", + "geo_st_isclosed_op", "geo_x_op", "geo_y_op", "GeoStDistanceOp", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 0faa6d8e1f..170954b238 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -54,6 +54,13 @@ name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo() ) +geo_st_isclosed_op = base_ops.create_unary_op( + name="geo_st_isclosed", + type_signature=op_typing.FixedOutputType( + dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like" + ), +) + geo_x_op = base_ops.create_unary_op( name="geo_x", type_signature=op_typing.FixedOutputType( @@ -73,11 +80,6 @@ ) -@dataclasses.dataclass(frozen=True) -class GeoIsClosedOp(base_ops.UnaryOp): - name = "st_isclosed" - - @dataclasses.dataclass(frozen=True) class GeoStDistanceOp(base_ops.BinaryOp): name = "st_distance" diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index be517fb5cc..2aafb338f2 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -354,3 +354,41 @@ def test_geo_st_intersection_with_similar_geometry_objects(): check_exact=False, rtol=0.1, ) + + +def test_geo_st_isclosed(): + bf_gs = bigframes.geopandas.GeoSeries( + [ + Point(0, 0), # Point + LineString([(0, 0), (1, 1)]), # Open LineString + LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString + Polygon([(0, 0), (1, 1), (0, 1)]), # Open polygon + GeometryCollection(), # Empty GeometryCollection + bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[ + 0 + ], # Also empty + None, # Should be filtered out by dropna + ], + index=[0, 1, 2, 3, 4, 5, 6], + ) + bf_result = bbq.st_isclosed(bf_gs).to_pandas() + + # Expected results based on ST_ISCLOSED documentation: + expected_data = [ + True, # Point: True + False, # Open LineString: False + True, # Closed LineString: True + False, # Polygon: False (only True if it's a full polygon) + False, # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) + False, # GEOMETRYCOLLECTION EMPTY: False + None, + ] + expected_index: pd.Index = pd.Index([0, 1, 2, 3, 4, 5, 6], dtype="Int64") + expected_series = pd.Series( + data=expected_data, index=expected_index, dtype="boolean" + ) + + pd.testing.assert_series_equal( + bf_result, + expected_series, + ) diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index 6bb3af202e..1e2ecf8a1c 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -361,73 +361,6 @@ def test_geo_intersection_with_similar_geometry_objects(): assert expected.iloc[2].equals(bf_result.iloc[2]) -def test_geo_is_closed(): - bf_gs = bigframes.geopandas.GeoSeries( - [ - Point(0, 0), # Point - LineString([(0, 0), (1, 1)]), # Open LineString - LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString - Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon - GeometryCollection(), # Empty GeometryCollection - bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[ - 0 - ], # Also empty - None, # Should be filtered out by dropna - ], - index=[0, 1, 2, 3, 4, 5, 6], - ) - pd_gs = geopandas.GeoSeries( - [ - Point(0, 0), - LineString([(0, 0), (1, 1)]), - LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), - Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), - GeometryCollection(), - geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[0], - None, - ], - index=pd.Index([0, 1, 2, 3, 4, 5, 6], dtype="Int64"), - crs="EPSG:4326", - ) - - bf_result = bf_gs.is_closed.to_pandas().dropna().astype(bool) - pd_result = pd_gs.is_closed.dropna().astype(bool) - - # Expected results based on ST_ISCLOSED documentation: - # Point: True - # Open LineString: False - # Closed LineString: True - # Polygon: True (assuming it's a full polygon, which it is) - # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed) - # GEOMETRYCOLLECTION EMPTY: False - - expected_data = [True, False, True, True, False, False] - expected_index = pd.Index([0, 1, 2, 3, 4, 5], dtype="Int64") - expected_series = pd.Series( - data=expected_data, index=expected_index, name="is_closed", dtype=bool - ) - - # First check BigQuery DataFrames result against expected - pd.testing.assert_series_equal( - bf_result, - expected_series, - check_dtype=False, # Pandas dtypes can be tricky with Nones involved before dropna - check_index_type=False, - check_series_type=False, - check_names=False, # Name might differ due to how it's set or not set in BQ vs Pandas - ) - - # Then check BigQuery DataFrames result against pandas/geopandas result - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_dtype=False, # Pandas dtypes can be tricky - check_index_type=False, - check_series_type=False, - check_names=False, # Name might differ - ) - - def test_geo_to_wkt(): bf_geo = bigframes.geopandas.GeoSeries( [ diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 613a929c04..92a58b3dc6 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -483,3 +483,25 @@ def intersection(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignor each aligned geometry with other. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def is_closed(self: GeoSeries) -> bigframes.series.Series: + """ + [Not Implemented] Use ``bigframes.bigquery.st_isclosed(series)`` + instead to return a boolean indicating if a shape is closed. + + In GeoPandas, this returns a Series of booleans with value True if a + LineString's or LinearRing's first and last points are equal. + + Returns False for any other geometry type. + + Returns: + bigframes.pandas.Series: + Series of booleans. + + Raises: + NotImplementedError: + GeoSeries.is_closed is not supported. Use + ``bigframes.bigquery.st_isclosed(series)``, instead. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 9848097924c94d5dd115c34b9cef1cfd2cb7e5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Wed, 4 Jun 2025 11:44:26 -0500 Subject: [PATCH 7/9] fix doctest --- bigframes/bigquery/_operations/geo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 74d9d4bf4f..4324302154 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -408,8 +408,8 @@ def st_isclosed( ... Point(0, 0), # Point ... LineString([(0, 0), (1, 1)]), # Open LineString ... LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString - ... Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Polygon - ... None, # geometry empty not supported by shapely use ST_GEOGFROMTEXT('GEOMETRYCOLLECTION EMPTY') instead + ... Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + ... None, ... ] ... ) >>> series @@ -424,8 +424,8 @@ def st_isclosed( 0 True 1 False 2 True - 3 True - 4 None + 3 False + 4 dtype: boolean Args: From 709ba1809bbf9befd19c37ef3a763445f9beab88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Fri, 6 Jun 2025 09:40:53 -0500 Subject: [PATCH 8/9] address review comments --- tests/system/small/bigquery/test_geo.py | 7 +- .../system/small/geopandas/test_geoseries.py | 282 +++++++++--------- 2 files changed, 140 insertions(+), 149 deletions(-) diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 2aafb338f2..3eb9b71495 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -383,12 +383,11 @@ def test_geo_st_isclosed(): False, # GEOMETRYCOLLECTION EMPTY: False None, ] - expected_index: pd.Index = pd.Index([0, 1, 2, 3, 4, 5, 6], dtype="Int64") - expected_series = pd.Series( - data=expected_data, index=expected_index, dtype="boolean" - ) + expected_series = pd.Series(data=expected_data, dtype="boolean") pd.testing.assert_series_equal( bf_result, expected_series, + # We default to Int64 (nullable) dtype, but pandas defaults to int64 index. + check_index_type=False, ) diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index 1e2ecf8a1c..ae99fd6fc2 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -31,6 +31,7 @@ import bigframes.geopandas import bigframes.pandas import bigframes.series +from tests.system.utils import assert_series_equal @pytest.fixture(scope="session") @@ -40,6 +41,40 @@ def urban_areas_dfs(session, urban_areas_table_id): return (bf_ua, pd_ua) +def test_geo_x(urban_areas_dfs): + bf_ua, pd_ua = urban_areas_dfs + bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo + pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) + bf_result = bf_series.x.to_pandas() + pd_result = pd_series.x + + assert_series_equal( + pd_result.astype(pd.Float64Dtype()), + bf_result, + ) + + +def test_geo_x_non_point(urban_areas_dfs): + bf_ua, _ = urban_areas_dfs + bf_series: bigframes.geopandas.GeoSeries = bf_ua["urban_area_geom"].geo + + with pytest.raises(google.api_core.exceptions.BadRequest, match="ST_X"): + bf_series.x.to_pandas() + + +def test_geo_y(urban_areas_dfs): + bf_ua, pd_ua = urban_areas_dfs + bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo + pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) + bf_result = bf_series.y.to_pandas() + pd_result = pd_series.y + + assert_series_equal( + pd_result.astype(pd.Float64Dtype()), + bf_result, + ) + + def test_geo_area_not_supported(): s = bigframes.pandas.Series( [ @@ -61,6 +96,108 @@ def test_geo_area_not_supported(): bf_series.area +def test_geo_distance_not_supported(): + s1 = bigframes.pandas.Series( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(10, 0), (10, 5), (0, 0)]), + Polygon([(0, 0), (2, 2), (2, 0)]), + LineString([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ], + dtype=GeometryDtype(), + ) + s2 = bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(10, 0), (10, 5), (0, 0)]), + Polygon([(0, 0), (2, 2), (2, 0)]), + LineString([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + ) + with pytest.raises( + NotImplementedError, + match=re.escape("GeoSeries.distance is not supported."), + ): + s1.geo.distance(s2) + + +def test_geo_from_xy(): + x = [2.5, 5, -3.0] + y = [0.5, 1, 1.5] + bf_result = ( + bigframes.geopandas.GeoSeries.from_xy(x, y) + .astype(geopandas.array.GeometryDtype()) + .to_pandas() + ) + pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype( + geopandas.array.GeometryDtype() + ) + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_series_type=False, + check_index=False, + ) + + +def test_geo_from_wkt(): + wkts = [ + "Point(0 1)", + "Point(2 4)", + "Point(5 3)", + "Point(6 8)", + ] + + bf_result = bigframes.geopandas.GeoSeries.from_wkt(wkts).to_pandas() + + pd_result = geopandas.GeoSeries.from_wkt(wkts) + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_series_type=False, + check_index=False, + ) + + +def test_geo_to_wkt(): + bf_geo = bigframes.geopandas.GeoSeries( + [ + Point(0, 1), + Point(2, 4), + Point(5, 3), + Point(6, 8), + ] + ) + + pd_geo = geopandas.GeoSeries( + [ + Point(0, 1), + Point(2, 4), + Point(5, 3), + Point(6, 8), + ] + ) + + # Test was failing before using str.replace because the pd_result had extra + # whitespace "POINT (0 1)" while bf_result had none "POINT(0 1)". + # str.replace replaces any encountered whitespaces with none. + bf_result = ( + bf_geo.to_wkt().astype("string[pyarrow]").to_pandas().str.replace(" ", "") + ) + + pd_result = pd_geo.to_wkt().astype("string[pyarrow]").str.replace(" ", "") + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_index=False, + ) + + def test_geo_boundary(): bf_s = bigframes.pandas.Series( [ @@ -184,33 +321,6 @@ def test_geo_difference_with_similar_geometry_objects(): assert expected.iloc[2].equals(bf_result.iloc[2]) -def test_geo_distance_not_supported(): - s1 = bigframes.pandas.Series( - [ - Polygon([(0, 0), (1, 1), (0, 1)]), - Polygon([(10, 0), (10, 5), (0, 0)]), - Polygon([(0, 0), (2, 2), (2, 0)]), - LineString([(0, 0), (1, 1), (0, 1)]), - Point(0, 1), - ], - dtype=GeometryDtype(), - ) - s2 = bigframes.geopandas.GeoSeries( - [ - Polygon([(0, 0), (1, 1), (0, 1)]), - Polygon([(10, 0), (10, 5), (0, 0)]), - Polygon([(0, 0), (2, 2), (2, 0)]), - LineString([(0, 0), (1, 1), (0, 1)]), - Point(0, 1), - ] - ) - with pytest.raises( - NotImplementedError, - match=re.escape("GeoSeries.distance is not supported."), - ): - s1.geo.distance(s2) - - def test_geo_drop_duplicates(): bf_series = bigframes.geopandas.GeoSeries( [Point(1, 1), Point(2, 2), Point(3, 3), Point(2, 2)] @@ -228,46 +338,6 @@ def test_geo_drop_duplicates(): ) -def test_geo_from_wkt(): - wkts = [ - "Point(0 1)", - "Point(2 4)", - "Point(5 3)", - "Point(6 8)", - ] - - bf_result = bigframes.geopandas.GeoSeries.from_wkt(wkts).to_pandas() - - pd_result = geopandas.GeoSeries.from_wkt(wkts) - - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_series_type=False, - check_index=False, - ) - - -def test_geo_from_xy(): - x = [2.5, 5, -3.0] - y = [0.5, 1, 1.5] - bf_result = ( - bigframes.geopandas.GeoSeries.from_xy(x, y) - .astype(geopandas.array.GeometryDtype()) - .to_pandas() - ) - pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype( - geopandas.array.GeometryDtype() - ) - - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_series_type=False, - check_index=False, - ) - - # the GeoSeries and GeoPandas results are not always the same. # For example, when the intersection between two polygons is empty, # GeoPandas returns 'POLYGON EMPTY' while GeoSeries returns 'GeometryCollection([])'. @@ -359,81 +429,3 @@ def test_geo_intersection_with_similar_geometry_objects(): assert expected.iloc[0].equals(bf_result.iloc[0]) assert expected.iloc[1].equals(bf_result.iloc[1]) assert expected.iloc[2].equals(bf_result.iloc[2]) - - -def test_geo_to_wkt(): - bf_geo = bigframes.geopandas.GeoSeries( - [ - Point(0, 1), - Point(2, 4), - Point(5, 3), - Point(6, 8), - ] - ) - - pd_geo = geopandas.GeoSeries( - [ - Point(0, 1), - Point(2, 4), - Point(5, 3), - Point(6, 8), - ] - ) - - # Test was failing before using str.replace because the pd_result had extra - # whitespace "POINT (0 1)" while bf_result had none "POINT(0 1)". - # str.replace replaces any encountered whitespaces with none. - bf_result = ( - bf_geo.to_wkt().astype("string[pyarrow]").to_pandas().str.replace(" ", "") - ) - - pd_result = pd_geo.to_wkt().astype("string[pyarrow]").str.replace(" ", "") - - pd.testing.assert_series_equal( - bf_result, - pd_result, - check_index=False, - ) - - -def test_geo_x(urban_areas_dfs): - bf_ua, pd_ua = urban_areas_dfs - bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo - pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) - bf_result = bf_series.x.to_pandas() - pd_result = pd_series.x - - # We need to use the original assert_series_equal for this test as pd.testing.assert_series_equal - # does not support the pd_result.astype(pd.Float64Dtype()), bf_result, combination - from tests.system.utils import ( # noqa: F811 - Reimport for this specific case - assert_series_equal, - ) - - assert_series_equal( - pd_result.astype(pd.Float64Dtype()), - bf_result, - ) - - -def test_geo_x_non_point(urban_areas_dfs): - bf_ua, _ = urban_areas_dfs - bf_series: bigframes.geopandas.GeoSeries = bf_ua["urban_area_geom"].geo - - with pytest.raises(google.api_core.exceptions.BadRequest, match="ST_X"): - bf_series.x.to_pandas() - - -def test_geo_y(urban_areas_dfs): - bf_ua, pd_ua = urban_areas_dfs - bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo - pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"]) - bf_result = bf_series.y.to_pandas() - pd_result = pd_series.y - from tests.system.utils import ( # noqa: F811 - Reimport for this specific case - assert_series_equal, - ) - - assert_series_equal( - pd_result.astype(pd.Float64Dtype()), - bf_result, - ) From 480e18a2ceefa8299d2772651a18dca8c248092e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 9 Jun 2025 09:36:50 -0500 Subject: [PATCH 9/9] Update bigframes/bigquery/_operations/geo.py --- bigframes/bigquery/_operations/geo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 3fff555fc0..bdc85eed9f 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -468,7 +468,6 @@ def st_length( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> from shapely.geometry import Polygon, LineString, Point, GeometryCollection >>> bpd.options.display.progress_bar = None