From 28279bb14817d231867372cdb7df59a760ab6706 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Tue, 3 Sep 2024 22:39:01 +0000 Subject: [PATCH 1/7] feat: define list accessor for bigframes Series --- bigframes/operations/_op_converters.py | 37 ++++++++++ bigframes/operations/lists.py | 43 +++++++++++ bigframes/operations/strings.py | 24 +----- bigframes/series.py | 5 ++ tests/system/small/operations/test_lists.py | 74 +++++++++++++++++++ .../pandas/core/arrays/arrow/accessors.py | 73 ++++++++++++++++++ 6 files changed, 235 insertions(+), 21 deletions(-) create mode 100644 bigframes/operations/_op_converters.py create mode 100644 bigframes/operations/lists.py create mode 100644 tests/system/small/operations/test_lists.py diff --git a/bigframes/operations/_op_converters.py b/bigframes/operations/_op_converters.py new file mode 100644 index 0000000000..3ebf22bcb6 --- /dev/null +++ b/bigframes/operations/_op_converters.py @@ -0,0 +1,37 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.operations as ops + + +def convert_index(key: int) -> ops.ArrayIndexOp: + if key < 0: + raise NotImplementedError("Negative indexing is not supported.") + return ops.ArrayIndexOp(index=key) + + +def convert_slice(key: slice) -> ops.ArraySliceOp: + if key.step is not None and key.step != 1: + raise NotImplementedError(f"Only a step of 1 is allowed, got {key.step}") + + if (key.start is not None and key.start < 0) or ( + key.stop is not None and key.stop < 0 + ): + raise NotImplementedError("Slicing with negative numbers is not allowed.") + + return ops.ArraySliceOp( + start=key.start if key.start is not None else 0, + stop=key.stop, + step=key.step, + ) diff --git a/bigframes/operations/lists.py b/bigframes/operations/lists.py new file mode 100644 index 0000000000..00ea2bbd18 --- /dev/null +++ b/bigframes/operations/lists.py @@ -0,0 +1,43 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Union + +import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors + +from bigframes.core import log_adapter +import bigframes.operations as ops +from bigframes.operations._op_converters import convert_index, convert_slice +import bigframes.operations.base +import bigframes.series as series + + +@log_adapter.class_logger +class ListAccessor( + bigframes.operations.base.SeriesMethods, vendoracessors.ListAccessor +): + __doc__ = vendoracessors.ListAccessor.__doc__ + + def len(self): + return self._apply_unary_op(ops.len_op) + + def __getitem__(self, key: Union[int, slice]) -> series.Series: + if isinstance(key, int): + return self._apply_unary_op(convert_index(key)) + elif isinstance(key, slice): + return self._apply_unary_op(convert_slice(key)) + else: + raise ValueError(f"key must be an int or slice, got {type(key).__name__}") diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index d3e9c7edc6..4af142e0d5 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -23,6 +23,7 @@ from bigframes.core import log_adapter import bigframes.dataframe as df import bigframes.operations as ops +from bigframes.operations._op_converters import convert_index, convert_slice import bigframes.operations.base import bigframes.series as series @@ -40,28 +41,9 @@ class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMet def __getitem__(self, key: Union[int, slice]) -> series.Series: if isinstance(key, int): - if key < 0: - raise NotImplementedError("Negative indexing is not supported.") - return self._apply_unary_op(ops.ArrayIndexOp(index=key)) + return self._apply_unary_op(convert_index(key)) elif isinstance(key, slice): - if key.step is not None and key.step != 1: - raise NotImplementedError( - f"Only a step of 1 is allowed, got {key.step}" - ) - if (key.start is not None and key.start < 0) or ( - key.stop is not None and key.stop < 0 - ): - raise NotImplementedError( - "Slicing with negative numbers is not allowed." - ) - - return self._apply_unary_op( - ops.ArraySliceOp( - start=key.start if key.start is not None else 0, - stop=key.stop, - step=key.step, - ) - ) + return self._apply_unary_op(convert_slice(key)) else: raise ValueError(f"key must be an int or slice, got {type(key).__name__}") diff --git a/bigframes/series.py b/bigframes/series.py index a166680f85..ba07064a19 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -53,6 +53,7 @@ import bigframes.operations.aggregations as agg_ops import bigframes.operations.base import bigframes.operations.datetimes as dt +import bigframes.operations.lists as lists import bigframes.operations.plotting as plotting import bigframes.operations.strings as strings import bigframes.operations.structs as structs @@ -161,6 +162,10 @@ def query_job(self) -> Optional[bigquery.QueryJob]: def struct(self) -> structs.StructAccessor: return structs.StructAccessor(self._block) + @property + def list(self) -> lists.ListAccessor: + return lists.ListAccessor(self._block) + @property @validations.requires_ordering() def T(self) -> Series: diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py new file mode 100644 index 0000000000..ab71e71751 --- /dev/null +++ b/tests/system/small/operations/test_lists.py @@ -0,0 +1,74 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pandas as pd +import pyarrow as pa +import pytest + +import bigframes.pandas as bpd + +from ...utils import assert_series_equal + + +@pytest.mark.parametrize( + ("key"), + [ + pytest.param(0, id="int"), + pytest.param(slice(None, None, None), id="default_start_slice"), + pytest.param(slice(0, None, 1), id="default_stop_slice"), + pytest.param(slice(0, 2, None), id="default_step_slice"), + ], +) +def test_getitem(key): + data = [[1], [2, 3], [4, 5, 6]] + s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + + bf_result = s.list[key].to_pandas() + pd_result = pd_s.list[key] + + assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False) + + +@pytest.mark.parametrize( + ("key", "expectation"), + [ + # Negative index + (-1, pytest.raises(NotImplementedError)), + # Slice with negative start + (slice(-1, None, None), pytest.raises(NotImplementedError)), + # Slice with negatiev end + (slice(0, -1, None), pytest.raises(NotImplementedError)), + # Slice with step not equal to 1 + (slice(0, 2, 2), pytest.raises(NotImplementedError)), + ], +) +def test_getitem_notsupported(key, expectation): + data = [[1], [2, 3], [4, 5, 6]] + s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + + with expectation as e: + assert s.list[key] == e + + +def test_len(): + data = [[], [1], [1, 2], [1, 2, 3]] + s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + + bf_result = s.list.len().to_pandas() + pd_result = pd_s.list.len() + + assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index ab199d53bd..5a018ac7c7 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -6,6 +6,79 @@ from bigframes import constants +class ListAccessor: + """ + Accessor object for list data properties of the Series values. + """ + + def len(self): + """ + Return the length of each list in the Series. + + Returns + ------- + Series + The length of each list. + + See Also + -------- + str.len : Python built-in function returning the length of an object. + Series.size : Returns the length of the Series. + StringMethods.len : Compute the length of each element in the Series/Index. + + Examples + -------- + >>> import bigframes.pandas as bpd + >>> import pyarrow as pa + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series( + ... [ + ... [1, 2, 3], + ... [3], + ... ], + ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), + ... ) + >>> s.list.len() + 0 3 + 1 1 + dtype: int32[pyarrow] + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def __getitem__(self, key: int | slice): + """ + Index or slice lists in the Series. + + Parameters + ---------- + key : int | slice + Index or slice of indices to access from each list. + + Returns + ------- + Series + The list at requested index. + + Examples + -------- + >>> import bigframes.pandas as bpd + >>> import pyarrow as pa + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series( + ... [ + ... [1, 2, 3], + ... [3], + ... ], + ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), + ... ) + >>> s.list[0] + 0 1 + 1 3 + dtype: int64[pyarrow] + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + class StructAccessor: """ Accessor object for structured data properties of the Series values. From 404e940663d3c1fbd9f61447b1aaf7d0e2bb7c99 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 4 Sep 2024 17:23:38 +0000 Subject: [PATCH 2/7] Add doc for list accessor --- docs/reference/bigframes.pandas/series.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/reference/bigframes.pandas/series.rst b/docs/reference/bigframes.pandas/series.rst index f14eb8e862..30cf851de7 100644 --- a/docs/reference/bigframes.pandas/series.rst +++ b/docs/reference/bigframes.pandas/series.rst @@ -35,6 +35,14 @@ String handling :inherited-members: :undoc-members: +List handling +^^^^^^^^^^^^^ + +.. automodule:: bigframes.operations.lists + :members: + :inherited-members: + :undoc-members: + Struct handling ^^^^^^^^^^^^^^^ From 003247a73c0706559b823429cc38fa0a595ad9e6 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 4 Sep 2024 19:18:36 +0000 Subject: [PATCH 3/7] Fix bug in docstring and inheritance --- bigframes/operations/lists.py | 3 +++ bigframes/series.py | 4 +++- .../pandas/core/arrays/arrow/accessors.py | 8 ++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/bigframes/operations/lists.py b/bigframes/operations/lists.py index 00ea2bbd18..16c22dfb2a 100644 --- a/bigframes/operations/lists.py +++ b/bigframes/operations/lists.py @@ -14,6 +14,7 @@ from __future__ import annotations +import inspect from typing import Union import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors @@ -41,3 +42,5 @@ def __getitem__(self, key: Union[int, slice]) -> series.Series: return self._apply_unary_op(convert_slice(key)) else: raise ValueError(f"key must be an int or slice, got {type(key).__name__}") + + __getitem__.__doc__ = inspect.getdoc(vendoracessors.ListAccessor.__getitem__) diff --git a/bigframes/series.py b/bigframes/series.py index ba07064a19..da5ca55023 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -67,6 +67,8 @@ " Try converting it to a remote function." ) +_list = list # Type alias to escape Series.list property + @log_adapter.class_logger class Series(bigframes.operations.base.SeriesMethods, vendored_pandas_series.Series): @@ -1713,7 +1715,7 @@ def to_latex( buf, columns=columns, header=header, index=index, **kwargs ) - def tolist(self) -> list: + def tolist(self) -> _list: return self.to_pandas().to_list() to_list = tolist diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index 5a018ac7c7..85bb58a674 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -36,12 +36,12 @@ def len(self): ... [1, 2, 3], ... [3], ... ], - ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), + ... dtype=bpd.ArrowDtype(pa.list_(pa.int64())), ... ) >>> s.list.len() 0 3 1 1 - dtype: int32[pyarrow] + dtype: Int64 """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -69,12 +69,12 @@ def __getitem__(self, key: int | slice): ... [1, 2, 3], ... [3], ... ], - ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), + ... dtype=bpd.ArrowDtype(pa.list_(pa.int64())), ... ) >>> s.list[0] 0 1 1 3 - dtype: int64[pyarrow] + dtype: Int64 """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 098b62003b2c2c54f4f54b051e62ae78ad1bb641 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 4 Sep 2024 20:52:30 +0000 Subject: [PATCH 4/7] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/series.py b/bigframes/series.py index da5ca55023..5192a9cf49 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -67,7 +67,7 @@ " Try converting it to a remote function." ) -_list = list # Type alias to escape Series.list property +_list = list # Type alias to escape Series.list property @log_adapter.class_logger From 883eed14f6fdda89950498b6bfb383a1740fca7b Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 4 Sep 2024 22:36:51 +0000 Subject: [PATCH 5/7] Skip test if Pandas version is too old --- tests/system/small/operations/test_lists.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/system/small/operations/test_lists.py b/tests/system/small/operations/test_lists.py index ab71e71751..7ecf79dc6a 100644 --- a/tests/system/small/operations/test_lists.py +++ b/tests/system/small/operations/test_lists.py @@ -13,6 +13,7 @@ # limitations under the License. +import packaging.version import pandas as pd import pyarrow as pa import pytest @@ -32,6 +33,10 @@ ], ) def test_getitem(key): + if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"): + pytest.skip( + "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data" + ) data = [[1], [2, 3], [4, 5, 6]] s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) @@ -64,6 +69,10 @@ def test_getitem_notsupported(key, expectation): def test_len(): + if packaging.version.Version(pd.__version__) < packaging.version.Version("2.2.0"): + pytest.skip( + "https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#series-list-accessor-for-pyarrow-list-data" + ) data = [[], [1], [1, 2], [1, 2, 3]] s = bpd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) pd_s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) From 72199d824166555d2d93aec82a38582ed29a3014 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 5 Sep 2024 06:00:45 +0000 Subject: [PATCH 6/7] Fix docstring format, and provide notebook examples. --- .../dataframes/struct_and_array_dtypes.ipynb | 88 +++++++++++--- .../pandas/core/arrays/arrow/accessors.py | 115 ++++++++---------- 2 files changed, 121 insertions(+), 82 deletions(-) diff --git a/notebooks/dataframes/struct_and_array_dtypes.ipynb b/notebooks/dataframes/struct_and_array_dtypes.ipynb index 3bcdaf40f7..def65ee6ca 100644 --- a/notebooks/dataframes/struct_and_array_dtypes.ipynb +++ b/notebooks/dataframes/struct_and_array_dtypes.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Copyright 2023 Google LLC\n", + "# Copyright 2024 Google LLC\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", @@ -212,6 +212,54 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3\n", + "1 2\n", + "2 4\n", + "Name: Scores, dtype: Int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Find the length of each array with list accessor\n", + "df['Scores'].list.len()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 88\n", + "1 81\n", + "2 89\n", + "Name: Scores, dtype: Int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Find the second element in each array with list accessor\n", + "df['Scores'].list[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, "outputs": [ { "data": { @@ -228,7 +276,7 @@ "Name: Scores, dtype: Int64" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -243,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -261,7 +309,7 @@ "Name: Scores, dtype: Float64" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -274,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -286,7 +334,7 @@ "Name: Scores, dtype: list[pyarrow]" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -299,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -361,7 +409,7 @@ "[3 rows x 3 columns]" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -394,14 +442,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/chelsealin/src/bigframes/venv/lib/python3.12/site-packages/google/cloud/bigquery/_pandas_helpers.py:570: UserWarning: Pyarrow could not determine the type of columns: bigframes_unnamed_index.\n", + "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/google/cloud/bigquery/_pandas_helpers.py:570: UserWarning: Pyarrow could not determine the type of columns: bigframes_unnamed_index.\n", " warnings.warn(\n" ] }, @@ -460,7 +508,7 @@ "[3 rows x 2 columns]" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -483,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -494,7 +542,7 @@ "dtype: object" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -514,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -525,7 +573,7 @@ "dtype: object" ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -537,7 +585,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -549,7 +597,7 @@ "Name: City, dtype: string" ] }, - "execution_count": 14, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -620,7 +668,7 @@ "[3 rows x 2 columns]" ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -648,7 +696,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index 85bb58a674..799f27ce0a 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -7,74 +7,65 @@ class ListAccessor: - """ - Accessor object for list data properties of the Series values. - """ + """Accessor object for list data properties of the Series values.""" def len(self): - """ - Return the length of each list in the Series. - - Returns - ------- - Series - The length of each list. - - See Also - -------- - str.len : Python built-in function returning the length of an object. - Series.size : Returns the length of the Series. - StringMethods.len : Compute the length of each element in the Series/Index. - - Examples - -------- - >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series( - ... [ - ... [1, 2, 3], - ... [3], - ... ], - ... dtype=bpd.ArrowDtype(pa.list_(pa.int64())), - ... ) - >>> s.list.len() - 0 3 - 1 1 - dtype: Int64 + """Compute the length of each list in the Series. + + See Also: + StringMethods.len : Compute the length of each element in the Series/Index. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pyarrow as pa + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series( + ... [ + ... [1, 2, 3], + ... [3], + ... ], + ... dtype=bpd.ArrowDtype(pa.list_(pa.int64())), + ... ) + >>> s.list.len() + 0 3 + 1 1 + dtype: Int64 + + Returns: + bigframes.series.Series: A Series or Index of integer values indicating + the length of each element in the Series or Index. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def __getitem__(self, key: int | slice): - """ - Index or slice lists in the Series. - - Parameters - ---------- - key : int | slice - Index or slice of indices to access from each list. - - Returns - ------- - Series - The list at requested index. - - Examples - -------- - >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series( - ... [ - ... [1, 2, 3], - ... [3], - ... ], - ... dtype=bpd.ArrowDtype(pa.list_(pa.int64())), - ... ) - >>> s.list[0] - 0 1 - 1 3 - dtype: Int64 + """Index or slice lists in the Series. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pyarrow as pa + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series( + ... [ + ... [1, 2, 3], + ... [3], + ... ], + ... dtype=bpd.ArrowDtype(pa.list_(pa.int64())), + ... ) + >>> s.list[0] + 0 1 + 1 3 + dtype: Int64 + + Args: + key (int | slice): Index or slice of indices to access from each list. + For integer indices, only non-negative values are accepted. For + slices, you must use a non-negative start, a non-negative end, and + a step of 1. + + Returns: + bigframes.series.Series: The list at requested index. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 850a42e607c761d7e6bd09b408f3a40746b4e612 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 5 Sep 2024 17:44:45 +0000 Subject: [PATCH 7/7] Use func link under see also --- .../bigframes_vendored/pandas/core/arrays/arrow/accessors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index 799f27ce0a..771146250a 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -12,8 +12,9 @@ class ListAccessor: def len(self): """Compute the length of each list in the Series. - See Also: - StringMethods.len : Compute the length of each element in the Series/Index. + **See Also:** + + - :func:`StringMethods.len` : Compute the length of each element in the Series/Index. **Examples:**