From 1b60fb79c9012c5c9eb411a7a3b4ed0fd5f22f55 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 9 Aug 2024 05:38:14 -1000 Subject: [PATCH] Disallow GeoSeries from accepting a column in favor of `_from_column` (#1434) closes #1433 Mirroring the upstream cudf changes in https://github.com/rapidsai/cudf/pull/16454, `cudf.Series` disallows accepting a `ColumnBase` in favor of the `cudf.Series._from_column` constructor. This PR does the same for `GeoSeries` as well as addresses the breakages due to the upstream cudf change. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Mark Harris (https://github.com/harrism) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuspatial/pull/1434 --- .../cuspatial/core/_column/geocolumn.py | 34 ++++++------- .../cuspatial/core/_column/geometa.py | 36 +++++++++++--- .../core/binops/distance_dispatch.py | 4 +- .../cuspatial/core/binops/equals_count.py | 4 +- .../cuspatial/core/binops/intersection.py | 12 ++--- .../cuspatial/core/binpreds/contains.py | 2 +- .../binpreds/contains_geometry_processor.py | 4 +- .../core/binpreds/feature_contains.py | 4 +- .../cuspatial/core/binpreds/feature_equals.py | 6 +-- .../core/binpreds/feature_intersects.py | 4 +- .../core/binpreds/feature_touches.py | 4 +- .../cuspatial/cuspatial/core/geodataframe.py | 9 ++-- python/cuspatial/cuspatial/core/geoseries.py | 48 ++++++++++--------- .../cuspatial/core/spatial/distance.py | 34 ++++++------- .../cuspatial/core/spatial/indexing.py | 4 +- python/cuspatial/cuspatial/core/trajectory.py | 4 +- .../cuspatial/io/geopandas_reader.py | 17 +++++-- .../cuspatial/tests/test_from_geopandas.py | 28 +++++++---- .../cuspatial/utils/binpred_utils.py | 9 +++- 19 files changed, 159 insertions(+), 108 deletions(-) diff --git a/python/cuspatial/cuspatial/core/_column/geocolumn.py b/python/cuspatial/cuspatial/core/_column/geocolumn.py index 5ad1ffb55..d32d75465 100644 --- a/python/cuspatial/cuspatial/core/_column/geocolumn.py +++ b/python/cuspatial/cuspatial/core/_column/geocolumn.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023 NVIDIA CORPORATION +# Copyright (c) 2021-2024, NVIDIA CORPORATION from enum import Enum from functools import cached_property @@ -153,14 +153,14 @@ def _from_points_xy(cls, points_xy: ColumnBase): coord_dtype = points_xy.dtype return cls( ( - cudf.Series(point_col), - cudf.Series( + cudf.Series._from_column(point_col), + cudf.Series._from_column( empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype) ), - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype) ), - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POLYGON, coord_dtype) ), ), @@ -205,14 +205,14 @@ def _from_multipoints_xy( return cls( ( - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POINT, coord_dtype) ), - cudf.Series(multipoint_col), - cudf.Series( + cudf.Series._from_column(multipoint_col), + cudf.Series._from_column( empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype) ), - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POLYGON, coord_dtype) ), ), @@ -265,14 +265,14 @@ def _from_linestrings_xy( return cls( ( - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POINT, coord_dtype) ), - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype) ), - cudf.Series(linestrings_col), - cudf.Series( + cudf.Series._from_column(linestrings_col), + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POLYGON, coord_dtype) ), ), @@ -331,16 +331,16 @@ def _from_polygons_xy( return cls( ( - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POINT, coord_dtype) ), - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype) ), - cudf.Series( + cudf.Series._from_column( empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype) ), - cudf.Series(polygons_col), + cudf.Series._from_column(polygons_col), ), meta, ) diff --git a/python/cuspatial/cuspatial/core/_column/geometa.py b/python/cuspatial/cuspatial/core/_column/geometa.py index 9f794ab16..2158b02b2 100644 --- a/python/cuspatial/cuspatial/core/_column/geometa.py +++ b/python/cuspatial/cuspatial/core/_column/geometa.py @@ -1,12 +1,13 @@ -# Copyright (c) 2021-2022 NVIDIA CORPORATION +# Copyright (c) 2021-2024, NVIDIA CORPORATION # This allows GeoMeta as its own init type from __future__ import annotations from enum import Enum -from typing import Union +from typing import Literal, Union import cudf +import cudf.core.column # This causes arrow to encode NONE as =255, which I'll accept now @@ -26,12 +27,33 @@ class GeoMeta: GeoSeries if necessary. """ - def __init__(self, meta: Union[GeoMeta, dict]): + def __init__( + self, + meta: Union[ + GeoMeta, + dict[ + Literal["input_types", "union_offsets"], + cudf.core.column.ColumnBase, + ], + ], + ): if isinstance(meta, dict): - self.input_types = cudf.Series(meta["input_types"], dtype="int8") - self.union_offsets = cudf.Series( - meta["union_offsets"], dtype="int32" - ) + meta_it = meta["input_types"] + if isinstance(meta_it, cudf.core.column.ColumnBase): + self.input_types = cudf.Series._from_column(meta_it).astype( + "int8" + ) + else: + # Could be Series from GeoSeries.__getitem__ + self.input_types = cudf.Series(meta_it, dtype="int8") + meta_uo = meta["union_offsets"] + if isinstance(meta_uo, cudf.core.column.ColumnBase): + self.union_offsets = cudf.Series._from_column(meta_uo).astype( + "int32" + ) + else: + # Could be Series from GeoSeries.__getitem__ + self.union_offsets = cudf.Series(meta_uo, dtype="int32") else: self.input_types = cudf.Series(meta.input_types, dtype="int8") self.union_offsets = cudf.Series(meta.union_offsets, dtype="int32") diff --git a/python/cuspatial/cuspatial/core/binops/distance_dispatch.py b/python/cuspatial/cuspatial/core/binops/distance_dispatch.py index db2e6339b..d55822ab8 100644 --- a/python/cuspatial/cuspatial/core/binops/distance_dispatch.py +++ b/python/cuspatial/cuspatial/core/binops/distance_dispatch.py @@ -1,3 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION + import cudf from cudf.core.column import as_column @@ -200,4 +202,4 @@ def __call__(self): # If `align==False`, geopandas preserves lhs index. index = None if self._align else self._res_index - return cudf.Series(result, index=index, nan_as_null=False) + return cudf.Series._from_column(result, index=index) diff --git a/python/cuspatial/cuspatial/core/binops/equals_count.py b/python/cuspatial/cuspatial/core/binops/equals_count.py index 83cd97b9e..b86358f48 100644 --- a/python/cuspatial/cuspatial/core/binops/equals_count.py +++ b/python/cuspatial/cuspatial/core/binops/equals_count.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import cudf @@ -76,4 +76,4 @@ def pairwise_multipoint_equals_count(lhs, rhs): rhs_column = rhs._column.mpoints._column result = c_pairwise_multipoint_equals_count(lhs_column, rhs_column) - return cudf.Series(result) + return cudf.Series._from_column(result) diff --git a/python/cuspatial/cuspatial/core/binops/intersection.py b/python/cuspatial/cuspatial/core/binops/intersection.py index 06e23a4e8..afc85956a 100644 --- a/python/cuspatial/cuspatial/core/binops/intersection.py +++ b/python/cuspatial/cuspatial/core/binops/intersection.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. from typing import TYPE_CHECKING @@ -109,15 +109,15 @@ def pairwise_linestring_intersection( ) from cuspatial.core.geoseries import GeoSeries - geometries = GeoSeries( + geometries = GeoSeries._from_column( GeoColumn( ( - cudf.Series(points), - cudf.Series( + cudf.Series._from_column(points), + cudf.Series._from_column( empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype) ), - cudf.Series(linestring_column), - cudf.Series( + cudf.Series._from_column(linestring_column), + cudf.Series._from_column( empty_geometry_column(Feature_Enum.POLYGON, coord_dtype) ), ), diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index c66afd4ed..f73b2f159 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -155,7 +155,7 @@ def _pairwise_contains_properly(points, polygons): # point) pair where the point is contained properly by the polygon. We can # use this to create a dataframe with only (polygon, point) pairs that # satisfy the relationship. - pip_result = cudf.Series(result_column, dtype="bool") + pip_result = cudf.Series._from_column(result_column).astype("bool") trues = pip_result[pip_result].index true_pairs = cudf.DataFrame( { diff --git a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py index 8ff5dd659..add5f6cfd 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py @@ -62,7 +62,9 @@ def _preprocess_multipoint_rhs(self, lhs, rhs): point_indices = geom.point_indices() from cuspatial.core.geoseries import GeoSeries - final_rhs = GeoSeries(GeoColumn._from_points_xy(xy_points._column)) + final_rhs = GeoSeries._from_column( + GeoColumn._from_points_xy(xy_points._column) + ) preprocess_result = PreprocessorResult( lhs, rhs, final_rhs, point_indices ) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py index 0a4824dd2..8bf915cd4 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. from typing import TypeVar @@ -71,7 +71,7 @@ def _intersection_results_for_contains_polygon(self, lhs, rhs): if len(pli_features) == 0: return _zero_series(len(lhs)) - pli_offsets = cudf.Series(pli[0]) + pli_offsets = cudf.Series._from_column(pli[0]) # Convert the pli to multipoints for equality checking multipoints = _points_and_lines_to_multipoints( diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_equals.py b/python/cuspatial/cuspatial/core/binpreds/feature_equals.py index 0bf109980..37d9a9310 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_equals.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_equals.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. from __future__ import annotations @@ -236,7 +236,7 @@ def _preprocess(self, lhs: "GeoSeries", rhs: "GeoSeries"): lhs, rhs, PreprocessorResult(None, rhs.point_indices) ) - def _vertices_equals(self, lhs: Series, rhs: Series): + def _vertices_equals(self, lhs: Series, rhs: Series) -> Series: """Compute the equals relationship between interleaved xy coordinate buffers.""" if not isinstance(lhs, Series): @@ -246,7 +246,7 @@ def _vertices_equals(self, lhs: Series, rhs: Series): length = min(len(lhs), len(rhs)) a = lhs[:length:2]._column == rhs[:length:2]._column b = rhs[1:length:2]._column == lhs[1:length:2]._column - return a & b + return Series._from_column(a & b) def _compute_predicate(self, lhs, rhs, preprocessor_result): """Perform the binary predicate operation on the input GeoSeries. diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index 25c463b7c..6d33be4b5 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import cupy as cp @@ -70,7 +70,7 @@ def _get_intersecting_geometry_indices(self, lhs, op_result): a set of lengths from the returned offsets buffer, then returns an integer index for all of the offset sizes that are larger than 0.""" - is_offsets = cudf.Series(op_result.result[0]) + is_offsets = cudf.Series._from_column(op_result.result[0]) is_sizes = is_offsets[1:].reset_index(drop=True) - is_offsets[ :-1 ].reset_index(drop=True) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_touches.py b/python/cuspatial/cuspatial/core/binpreds/feature_touches.py index 3071fdf7a..ea1387951 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_touches.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_touches.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import cupy as cp @@ -66,7 +66,7 @@ def _preprocess(self, lhs, rhs): # First compute pli which will contain points for line crossings and # linestrings for overlapping segments. pli = _basic_intersects_pli(lhs, rhs) - offsets = cudf.Series(pli[0]) + offsets = cudf.Series._from_column(pli[0]) pli_geometry_count = offsets[1:].reset_index(drop=True) - offsets[ :-1 ].reset_index(drop=True) diff --git a/python/cuspatial/cuspatial/core/geodataframe.py b/python/cuspatial/cuspatial/core/geodataframe.py index 79d2de665..ef968e38e 100644 --- a/python/cuspatial/cuspatial/core/geodataframe.py +++ b/python/cuspatial/cuspatial/core/geodataframe.py @@ -201,7 +201,10 @@ def _apply_boolean_mask(self, mask: BooleanMask, keep_index=True) -> T: data = data_columns._apply_boolean_mask(mask, keep_index) geo = GeoDataFrame( - {name: geo_columns[name][mask.column] for name in geo_columns} + { + name: geo_columns[name][cudf.Index._from_column(mask.column)] + for name in geo_columns + } ) res = self._from_data(self._recombine_columns(geo, data)) @@ -319,9 +322,9 @@ class _GeoSeriesUtility: def _from_data(cls, new_data, name=None, index=None): new_column = new_data.columns[0] if is_geometry_type(new_column): - return GeoSeries(new_column, name=name, index=index) + return GeoSeries._from_column(new_column, name=name, index=index) else: - return cudf.Series(new_column, name=name, index=index) + return cudf.Series._from_column(new_column, name=name, index=index) def is_geometry_type(obj): diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 10addfec0..b8d7c4945 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -77,7 +77,7 @@ def __init__( data: Optional[ Union[gpd.GeoSeries, Tuple, T, pd.Series, GeoColumn, list] ], - index: Union[cudf.Index, pd.Index] = None, + index: Union[cudf.Index, pd.Index, None] = None, dtype=None, name=None, nan_as_null=True, @@ -86,29 +86,29 @@ def __init__( if data is None or isinstance(data, (pd.Series, list)): data = gpGeoSeries(data) # Create column - if isinstance(data, GeoColumn): - column = data - elif isinstance(data, GeoSeries): - column = data._column + if isinstance(data, GeoSeries): + gser = type(self)._from_column( + data._column, index=data.index, name=data.name + ) elif isinstance(data, gpGeoSeries): from cuspatial.io.geopandas_reader import GeoPandasReader adapter = GeoPandasReader(data) pandas_meta = GeoMeta(adapter.get_geopandas_meta()) - column = GeoColumn(adapter._get_geotuple(), pandas_meta) + geocolumn = GeoColumn(adapter._get_geotuple(), pandas_meta) + gser = type(self)._from_column( + geocolumn, index=cudf.Index(data.index), name=data.name + ) else: raise TypeError( f"Incompatible object passed to GeoSeries ctor {type(data)}" ) # Condition index - if isinstance(data, (gpGeoSeries, GeoSeries)): - if index is None: - index = data.index - if index is None: - index = cudf.RangeIndex(0, len(column)) - super().__init__( - column, index, dtype=dtype, name=name, nan_as_null=nan_as_null - ) + if index is not None: + gser.index = cudf.Index(index) + if name is not None: + gser.name = name + super().__init__(gser, dtype=dtype, nan_as_null=nan_as_null) @property def feature_types(self): @@ -435,9 +435,11 @@ def __getitem__(self, indexes): ) if isinstance(indexes, Integral): - return GeoSeries(column, name=self._sr.name).to_shapely() + return GeoSeries._from_column( + column, name=self._sr.name + ).to_shapely() else: - return GeoSeries( + return GeoSeries._from_column( column, index=self._sr.index[indexes], name=self._sr.name ) @@ -454,7 +456,7 @@ def from_arrow(union): "union_offsets": union.offsets, }, ) - return GeoSeries(column) + return GeoSeries._from_column(column) @property def loc(self): @@ -636,7 +638,7 @@ def to_arrow(self): return pa.UnionArray.from_dense( self._column._meta.input_types.to_arrow(), - self._column._meta.union_offsets.to_arrow(), + self._column._meta.union_offsets.astype("int32").to_arrow(), [ arrow_points, arrow_mpoints, @@ -681,7 +683,7 @@ def _align_to_index( "union_offsets": aligned_union_offsets, }, ) - return GeoSeries(column) + return GeoSeries._from_column(column) @classmethod def from_points_xy(cls, points_xy): @@ -700,7 +702,7 @@ def from_points_xy(cls, points_xy): """ coords_dtype = _check_coords_dtype(points_xy) - return cls( + return cls._from_column( GeoColumn._from_points_xy(as_column(points_xy, dtype=coords_dtype)) ) @@ -735,7 +737,7 @@ def from_multipoints_xy(cls, multipoints_xy, geometry_offset): dtype: geometry """ coords_dtype = coords_dtype = _check_coords_dtype(multipoints_xy) - return cls( + return cls._from_column( GeoColumn._from_multipoints_xy( as_column(multipoints_xy, dtype=coords_dtype), as_column(geometry_offset, dtype="int32"), @@ -781,7 +783,7 @@ def from_linestrings_xy( dtype: geometry """ coords_dtype = _check_coords_dtype(linestrings_xy) - return cls( + return cls._from_column( GeoColumn._from_linestrings_xy( as_column(linestrings_xy, dtype=coords_dtype), as_column(part_offset, dtype="int32"), @@ -831,7 +833,7 @@ def from_polygons_xy( dtype: geometry """ coords_dtype = _check_coords_dtype(polygons_xy) - return cls( + return cls._from_column( GeoColumn._from_polygons_xy( as_column(polygons_xy, dtype=coords_dtype), as_column(ring_offset, dtype="int32"), diff --git a/python/cuspatial/cuspatial/core/spatial/distance.py b/python/cuspatial/cuspatial/core/spatial/distance.py index f9a0b1d6b..0a95baeba 100644 --- a/python/cuspatial/cuspatial/core/spatial/distance.py +++ b/python/cuspatial/cuspatial/core/spatial/distance.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. import cudf from cudf import DataFrame, Series @@ -217,15 +217,13 @@ def pairwise_point_distance(points1: GeoSeries, points2: GeoSeries): rhs_point_collection_type, ) = _extract_point_column_and_collection_type(points2) - return Series._from_data( - { - None: cpp_pairwise_point_distance( - lhs_point_collection_type, - rhs_point_collection_type, - lhs_column, - rhs_column, - ) - } + return Series._from_column( + cpp_pairwise_point_distance( + lhs_point_collection_type, + rhs_point_collection_type, + lhs_column, + rhs_column, + ) ) @@ -293,13 +291,11 @@ def pairwise_linestring_distance( if len(multilinestrings1) == 0: return cudf.Series(dtype="float64") - return Series._from_data( - { - None: cpp_pairwise_linestring_distance( - multilinestrings1.lines.column(), - multilinestrings2.lines.column(), - ) - } + return Series._from_column( + cpp_pairwise_linestring_distance( + multilinestrings1.lines.column(), + multilinestrings2.lines.column(), + ) ) @@ -583,8 +579,8 @@ def pairwise_linestring_polygon_distance( linestrings_column = linestrings.lines.column() polygon_column = polygons.polygons.column() - return Series._from_data( - {None: c_pairwise_line_poly_dist(linestrings_column, polygon_column)} + return Series._from_column( + c_pairwise_line_poly_dist(linestrings_column, polygon_column) ) diff --git a/python/cuspatial/cuspatial/core/spatial/indexing.py b/python/cuspatial/cuspatial/core/spatial/indexing.py index 52d63e274..a7116d435 100644 --- a/python/cuspatial/cuspatial/core/spatial/indexing.py +++ b/python/cuspatial/cuspatial/core/spatial/indexing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. import warnings @@ -187,4 +187,4 @@ def quadtree_on_points( max_depth, max_size, ) - return Series(key_to_point), DataFrame._from_data(*quadtree) + return Series._from_column(key_to_point), DataFrame._from_data(*quadtree) diff --git a/python/cuspatial/cuspatial/core/trajectory.py b/python/cuspatial/cuspatial/core/trajectory.py index be779313f..27fdb227c 100644 --- a/python/cuspatial/cuspatial/core/trajectory.py +++ b/python/cuspatial/cuspatial/core/trajectory.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import numpy as np @@ -71,7 +71,7 @@ def derive_trajectories(object_ids, points: GeoSeries, timestamps): objects, traj_offsets = cpp_derive_trajectories( object_ids, xs, ys, timestamps ) - return DataFrame._from_data(*objects), Series(data=traj_offsets) + return DataFrame._from_data(*objects), Series._from_column(traj_offsets) def trajectory_bounding_boxes(num_trajectories, object_ids, points: GeoSeries): diff --git a/python/cuspatial/cuspatial/io/geopandas_reader.py b/python/cuspatial/cuspatial/io/geopandas_reader.py index cca86448d..a4fc8091e 100644 --- a/python/cuspatial/cuspatial/io/geopandas_reader.py +++ b/python/cuspatial/cuspatial/io/geopandas_reader.py @@ -1,4 +1,8 @@ -# Copyright (c) 2020-2022 NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. + +from __future__ import annotations + +from typing import Literal from geopandas import GeoSeries as gpGeoSeries from shapely.geometry import ( @@ -12,6 +16,7 @@ ) import cudf +import cudf.core.column from cuspatial.core._column.geometa import Feature_Enum from cuspatial.io import pygeoarrow @@ -119,7 +124,11 @@ def _get_geotuple(self) -> cudf.Series: polygons, ) - def get_geopandas_meta(self) -> dict: + def get_geopandas_meta( + self, + ) -> dict[ + Literal["input_types", "union_offsets"], cudf.core.column.ColumnBase + ]: """ Returns the metadata that was created converting the GeoSeries into GeoArrow format. The metadata essentially contains the object order @@ -129,6 +138,6 @@ def get_geopandas_meta(self) -> dict: """ buffers = self.buffers return { - "input_types": buffers.type_codes, - "union_offsets": buffers.offsets, + "input_types": cudf.core.column.as_column(buffers.type_codes), + "union_offsets": cudf.core.column.as_column(buffers.offsets), } diff --git a/python/cuspatial/cuspatial/tests/test_from_geopandas.py b/python/cuspatial/cuspatial/tests/test_from_geopandas.py index 74675512c..1c33e216a 100644 --- a/python/cuspatial/cuspatial/tests/test_from_geopandas.py +++ b/python/cuspatial/cuspatial/tests/test_from_geopandas.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import geopandas as gpd import pandas as pd from shapely.geometry import ( @@ -63,7 +63,9 @@ def test_from_geopandas_multipoint(): cudf.Series([1.0, 2.0, 3.0, 4.0], dtype="float64"), ) cudf.testing.assert_series_equal( - cudf.Series(cugs._column.mpoints._column.base_children[0]), + cudf.Series._from_column( + cugs._column.mpoints._column.base_children[0] + ), cudf.Series([0, 2], dtype="int32"), ) @@ -75,7 +77,7 @@ def test_from_geopandas_linestring(): cugs.lines.xy, cudf.Series([4.0, 3.0, 2.0, 1.0], dtype="float64") ) cudf.testing.assert_series_equal( - cudf.Series(cugs._column.lines._column.base_children[0]), + cudf.Series._from_column(cugs._column.lines._column.base_children[0]), cudf.Series([0, 1], dtype="int32"), ) @@ -95,7 +97,7 @@ def test_from_geopandas_multilinestring(): cudf.Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype="float64"), ) cudf.testing.assert_series_equal( - cudf.Series(cugs._column.lines._column.base_children[0]), + cudf.Series._from_column(cugs._column.lines._column.base_children[0]), cudf.Series([0, 2], dtype="int32"), ) @@ -112,11 +114,13 @@ def test_from_geopandas_polygon(): cudf.Series([0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0], dtype="float64"), ) cudf.testing.assert_series_equal( - cudf.Series(cugs._column.polygons._column.base_children[0]), + cudf.Series._from_column( + cugs._column.polygons._column.base_children[0] + ), cudf.Series([0, 1], dtype="int32"), ) cudf.testing.assert_series_equal( - cudf.Series( + cudf.Series._from_column( cugs._column.polygons._column.base_children[1].base_children[0] ), cudf.Series([0, 1], dtype="int32"), @@ -156,11 +160,13 @@ def test_from_geopandas_polygon_hole(): ), ) cudf.testing.assert_series_equal( - cudf.Series(cugs._column.polygons._column.base_children[0]), + cudf.Series._from_column( + cugs._column.polygons._column.base_children[0] + ), cudf.Series([0, 1], dtype="int32"), ) cudf.testing.assert_series_equal( - cudf.Series( + cudf.Series._from_column( cugs._column.polygons._column.base_children[1].base_children[0] ), cudf.Series([0, 2], dtype="int32"), @@ -204,11 +210,13 @@ def test_from_geopandas_multipolygon(): ), ) cudf.testing.assert_series_equal( - cudf.Series(cugs._column.polygons._column.base_children[0]), + cudf.Series._from_column( + cugs._column.polygons._column.base_children[0] + ), cudf.Series([0, 1], dtype="int32"), ) cudf.testing.assert_series_equal( - cudf.Series( + cudf.Series._from_column( cugs._column.polygons._column.base_children[1].base_children[0] ), cudf.Series([0, 2], dtype="int32"), diff --git a/python/cuspatial/cuspatial/utils/binpred_utils.py b/python/cuspatial/cuspatial/utils/binpred_utils.py index c8e2804eb..6a299840f 100644 --- a/python/cuspatial/cuspatial/utils/binpred_utils.py +++ b/python/cuspatial/cuspatial/utils/binpred_utils.py @@ -4,6 +4,7 @@ import numpy as np import cudf +import cudf.core.column import cuspatial from cuspatial.core._column.geocolumn import ColumnType @@ -425,7 +426,13 @@ def _pli_features_rebuild_offsets(pli, features): in_sizes = ( features.sizes if len(features) > 0 else _zero_series(len(pli[0]) - 1) ) - offsets = cudf.Series(pli[0]) + if isinstance(pli[0], cudf.core.column.ColumnBase): + offsets = cudf.Series._from_column(pli[0]) + else: + # TODO: Can be removed if pairwise_linestring_intersection + # always returns a cudf.Series + # in the first element + offsets = cudf.Series(pli[0]) offset_sizes = offsets[1:].reset_index(drop=True) - offsets[ :-1 ].reset_index(drop=True)