From 28273d6c88e7644aaa4e02667530b8ec39d979a5 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 16 Apr 2024 17:28:12 +0000
Subject: [PATCH 1/3] feat: Add transpose support for small homogeneously typed
 DataFrames.

---
 bigframes/core/blocks.py                      | 54 +++++++++++++
 bigframes/dataframe.py                        |  7 ++
 tests/system/small/test_dataframe.py          | 18 +++++
 .../bigframes_vendored/pandas/core/frame.py   | 76 +++++++++++++++++++
 4 files changed, 155 insertions(+)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 0f9cacd83d..9249923249 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1542,6 +1542,13 @@ def melt(
         var_names=typing.Sequence[typing.Hashable],
         value_name: typing.Hashable = "value",
     ):
+        """
+        Unpivot columns to produce longer, narrower dataframe.
+        Arguments correspond to pandas.melt arguments.
+        id_vars: passthrough variables that will be repeated N times
+        value_vars: variables that will be melted together
+        var_names:
+        """
         # TODO: Implement col_level and ignore_index
         unpivot_col_id = guid.generate_guid()
         var_col_ids = tuple([guid.generate_guid() for _ in var_names])
@@ -1570,6 +1577,53 @@ def melt(
             index_columns=[index_id],
         )
 
+    def transpose(self) -> Block:
+        """Transpose the block. Will fail if dtypes incompatible or too many rows"""
+        original_col_index = self.column_labels
+        original_row_index = self.index.to_pandas()
+        original_row_count = len(original_row_index)
+        LIMIT = 9900
+        if original_row_count > LIMIT:
+            raise NotImplementedError(
+                f"Object has {original_row_count} rows and is too large to transpose."
+            )
+
+        # Add row numbers to both axes to disambiguate, clean them up later
+        block = self
+        numbered_block = block.with_column_labels(
+            utils.combine_indices(
+                block.column_labels, pd.Index(range(len(block.column_labels)))
+            )
+        )
+        numbered_block, offsets = numbered_block.promote_offsets()
+
+        stacked_block = numbered_block.melt(
+            id_vars=(offsets,),
+            var_names=(
+                *[name for name in original_col_index.names],
+                "col_offset",
+            ),
+            value_vars=block.value_columns,
+        )
+        col_labels = stacked_block.value_columns[-2 - original_col_index.nlevels : -2]
+        col_offset = stacked_block.value_columns[-2]  # disambiguator we created earlier
+        cell_values = stacked_block.value_columns[-1]
+        # Groupby source column
+        stacked_block = stacked_block.set_index(
+            [*col_labels, col_offset]
+        )  # col index is now row index
+        result = stacked_block.pivot(
+            columns=[offsets],
+            values=[cell_values],
+            columns_unique_values=tuple(range(original_row_count)),
+        )
+        # Drop the offsets from both axes before returning
+        return (
+            result.with_column_labels(original_row_index)
+            .order_by([ordering.ascending_over(result.index_columns[-1])])
+            .drop_levels([result.index_columns[-1]])
+        )
+
     def _create_stack_column(
         self, col_label: typing.Tuple, stack_labels: typing.Sequence[typing.Tuple]
     ):
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 11e592542c..bbb83662fe 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -311,6 +311,13 @@ def bqclient(self) -> bigframes.Session:
     def _session(self) -> bigframes.Session:
         return self._get_block().expr.session
 
+    @property
+    def T(self) -> DataFrame:
+        return DataFrame(self._get_block().transpose())
+
+    def transpose(self) -> DataFrame:
+        return self.T
+
     def __len__(self):
         rows, _ = self.shape
         return rows
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 4c598a682d..1a2bb828c3 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2465,6 +2465,24 @@ def test_df_describe(scalars_dfs):
     ).all()
 
 
+def test_df_transpose():
+    # Include some floats to ensure type coercion
+    values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
+    # Test complex case of both axes being multi-indices with non-unique elements
+    columns = pd.Index(["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow"))
+    columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
+    index = pd.Index(["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow"))
+    rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
+
+    pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
+    bf_df = dataframe.DataFrame(values, index=rows_multi, columns=columns_multi)
+
+    pd_result = pd_df.T
+    bf_result = bf_df.T.to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     ("ordered"),
     [
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index c692bdbfec..0b1b213a1a 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -93,6 +93,82 @@ def values(self) -> np.ndarray:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    @property
+    def T(self) -> DataFrame:
+        """
+        The transpose of the DataFrame.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+            >>> df
+               col1  col2
+            0     1     3
+            1     2     4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.T
+                  0  1
+            col1  1  2
+            col2  3  4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Returns:
+            DataFrame: The transposed DataFrame.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def transpose(self) -> DataFrame:
+        """
+        Transpose index and columns.
+
+        Reflect the DataFrame over its main diagonal by writing rows as columns
+        and vice-versa. The property :attr:`.T` is an accessor to the method
+        :meth:`transpose`.
+
+        **Examples:**
+            **Square DataFrame with homogeneous dtype**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> d1 = {'col1': [1, 2], 'col2': [3, 4]}
+            >>> df1 = bpd.DataFrame(data=d1)
+            >>> df1
+               col1  col2
+            0     1     3
+            1     2     4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df1_transposed = df1.T  # or df1.transpose()
+            >>> df1_transposed
+                  0  1
+            col1  1  2
+            col2  3  4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            When the dtype is homogeneous in the original DataFrame, we get a
+            transposed DataFrame with the same dtype:
+
+            >>> df1.dtypes
+            col1    Int64
+            col2    Int64
+            dtype: object
+            >>> df1_transposed.dtypes
+            0    Int64
+            1    Int64
+            dtype: object
+
+        Returns:
+            DataFrame: The transposed DataFrame.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def info(
         self,
         verbose: bool | None = None,

From aa43f87dac7e6a3795b345621814a86a0c47246f Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 18 Apr 2024 17:55:00 +0000
Subject: [PATCH 2/3] cleanup comments and column limit

---
 bigframes/core/blocks.py                            | 7 ++-----
 third_party/bigframes_vendored/pandas/core/frame.py | 2 ++
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 9249923249..dd5fe49c31 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -35,6 +35,7 @@
 import pyarrow as pa
 
 import bigframes._config.sampling_options as sampling_options
+import bigframes.constants
 import bigframes.constants as constants
 import bigframes.core as core
 import bigframes.core.expression as ex
@@ -1545,9 +1546,6 @@ def melt(
         """
         Unpivot columns to produce longer, narrower dataframe.
         Arguments correspond to pandas.melt arguments.
-        id_vars: passthrough variables that will be repeated N times
-        value_vars: variables that will be melted together
-        var_names:
         """
         # TODO: Implement col_level and ignore_index
         unpivot_col_id = guid.generate_guid()
@@ -1582,8 +1580,7 @@ def transpose(self) -> Block:
         original_col_index = self.column_labels
         original_row_index = self.index.to_pandas()
         original_row_count = len(original_row_index)
-        LIMIT = 9900
-        if original_row_count > LIMIT:
+        if original_row_count > bigframes.constants.MAX_COLUMNS:
             raise NotImplementedError(
                 f"Object has {original_row_count} rows and is too large to transpose."
             )
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 0b1b213a1a..8666199043 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -99,6 +99,7 @@ def T(self) -> DataFrame:
         The transpose of the DataFrame.
 
         **Examples:**
+
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
@@ -130,6 +131,7 @@ def transpose(self) -> DataFrame:
         :meth:`transpose`.
 
         **Examples:**
+
             **Square DataFrame with homogeneous dtype**
 
             >>> import bigframes.pandas as bpd

From 234da482bfbf861c5ba501317c54df1e7f0febc4 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 23 Apr 2024 00:06:59 +0000
Subject: [PATCH 3/3] address pr comments

---
 bigframes/core/blocks.py                            | 2 +-
 tests/system/small/test_dataframe.py                | 5 +++++
 third_party/bigframes_vendored/pandas/core/frame.py | 4 ++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index dd5fe49c31..2a888125f8 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1576,7 +1576,7 @@ def melt(
         )
 
     def transpose(self) -> Block:
-        """Transpose the block. Will fail if dtypes incompatible or too many rows"""
+        """Transpose the block. Will fail if dtypes aren't coercible to a common type or too many rows"""
         original_col_index = self.column_labels
         original_row_index = self.index.to_pandas()
         original_row_count = len(original_row_index)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 1a2bb828c3..f41a21add0 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2483,6 +2483,11 @@ def test_df_transpose():
     pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
 
 
+def test_df_transpose_error():
+    with pytest.raises(TypeError, match="Cannot coerce.*to a common type."):
+        dataframe.DataFrame([[1, "hello"], [2, "world"]]).transpose()
+
+
 @pytest.mark.parametrize(
     ("ordered"),
     [
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 8666199043..d639aa251e 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -98,6 +98,8 @@ def T(self) -> DataFrame:
         """
         The transpose of the DataFrame.
 
+        All columns must be the same dtype (numerics can be coerced to a common supertype).
+
         **Examples:**
 
             >>> import bigframes.pandas as bpd
@@ -130,6 +132,8 @@ def transpose(self) -> DataFrame:
         and vice-versa. The property :attr:`.T` is an accessor to the method
         :meth:`transpose`.
 
+        All columns must be the same dtype (numerics can be coerced to a common supertype).
+
         **Examples:**
 
             **Square DataFrame with homogeneous dtype**