googleapis · gcf-merge-on-green · Nov 3, 2023 · Nov 1, 2023 · Nov 2, 2023 · Nov 2, 2023
@@ -304,6 +304,9 @@ def __len__(self):
         rows, _ = self.shape
         return rows
 
+    def __iter__(self):
+        return iter(self.columns)
+
     def astype(
         self,
         dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype],
@@ -1477,12 +1480,27 @@ def isin(self, values) -> DataFrame:
                 f"isin(), you passed a [{type(values).__name__}]"
             )
 
+    def keys(self) -> pandas.Index:
+        return self.columns
+
     def items(self):
         column_ids = self._block.value_columns
         column_labels = self._block.column_labels
         for col_id, col_label in zip(column_ids, column_labels):
             yield col_label, bigframes.series.Series(self._block.select_column(col_id))
 
+    def iterrows(self) -> Iterable[tuple[typing.Any, pandas.Series]]:
+        for df in self.to_pandas_batches():
+            for item in df.iterrows():
+                yield item
+
+    def itertuples(
+        self, index: bool = True, name: typing.Optional[str] = "Pandas"
+    ) -> Iterable[tuple[typing.Any, ...]]:
+        for df in self.to_pandas_batches():
+            for item in df.itertuples(index=index, name=name):
+                yield item
+
     def dropna(
         self,
         *,

@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import itertools
 import numbers
 import textwrap
 import typing
@@ -148,6 +149,11 @@ def _set_internal_query_job(self, query_job: bigquery.QueryJob):
     def __len__(self):
         return self.shape[0]
 
+    def __iter__(self) -> typing.Iterator:
+        return itertools.chain.from_iterable(
+            map(lambda x: x.index, self._block.to_pandas_batches())
+        )
+
     def copy(self) -> Series:
         return Series(self._block)
 

@@ -803,6 +803,55 @@ def test_apply_series_scalar_callable(
     pandas.testing.assert_series_equal(bf_result, pd_result)
 
 
+def test_df_keys(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    pandas.testing.assert_index_equal(
+        scalars_df_index.keys(), scalars_pandas_df_index.keys()
+    )
+
+
+def test_df_iter(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index):
+        assert bf_i == df_i
+
+
+def test_iterrows(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    for (bf_index, bf_series), (pd_index, pd_series) in zip(
+        scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
+    ):
+        assert bf_index == pd_index
+        pandas.testing.assert_series_equal(bf_series, pd_series)
+
+
+@pytest.mark.parametrize(
+    (
+        "index",
+        "name",
+    ),
+    [
+        (
+            True,
+            "my_df",
+        ),
+        (False, None),
+    ],
+)
+def test_itertuples(scalars_df_index, index, name):
+    # Numeric has slightly different representation as a result of conversions.
+    bf_tuples = scalars_df_index.itertuples(index, name)
+    pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
+    for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
+        assert bf_tuple == pd_tuple
+
+
 def test_df_isin_list(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     values = ["Hello, World!", 55555, 2.51, pd.NA, True]

@@ -975,6 +975,85 @@ def isin(self, values):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def keys(self):
+        """
+        Get the 'info axis'.
+
+        This is index for Series, columns for DataFrame.
+
+        Returns:
+            Index: Info axis.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ...     })
+            >>> df.keys()
+            Index(['A', 'B'], dtype='object')
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def iterrows(self):
+        """
+        Iterate over DataFrame rows as (index, Series) pairs.
+
+        Yields:
+            a tuple (index, data) where data contains row values as a Series
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ...     })
+            >>> index, row = next(df.iterrows())
+            >>> index
+            0
+            >>> row
+            A    1
+            B    4
+            Name: 0, dtype: object
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def itertuples(self, index: bool = True, name: str | None = "Pandas"):
+        """
+        Iterate over DataFrame rows as namedtuples.
+
+        Args:
+            index (bool, default True):
+                If True, return the index as the first element of the tuple.
+            name (str or None, default "Pandas"):
+                The name of the returned namedtuples or None to return regular
+                tuples.
+
+        Returns:
+            iterator:
+                An object to iterate over namedtuples for each row in the
+                DataFrame with the first field possibly being the index and
+                following fields being the column values.
+
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ...     })
+            >>> next(df.itertuples(name="Pair"))
+            Pair(Index=0, A=1, B=4)
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def items(self):
         """
         Iterate over (column name, Series) pairs.

@@ -1,7 +1,7 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
 from __future__ import annotations
 
-from typing import Literal, Optional
+from typing import Iterator, Literal, Optional
 
 from bigframes import constants
 from third_party.bigframes_vendored.pandas.core import indexing
@@ -35,6 +35,35 @@ def size(self) -> int:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def __iter__(self) -> Iterator:
+        """
+        Iterate over info axis.
+
+        Returns
+            iterator: Info axis as iterator.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     'A': [1, 2, 3],
+            ...     'B': [4, 5, 6],
+            ... })
+            >>> for x in df:
+            ...     print(x)
+            A
+            B
+
+            >>> series = bpd.Series(["a", "b", "c"], index=[10, 20, 30])
+            >>> for x in series:
+            ...     print(x)
+            10
+            20
+            30
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # -------------------------------------------------------------------------
     # Unary Methods