Skip to content

Commit 64dfdbc

Browse files
Frances Hubis Thomacopybara-github
Frances Hubis Thoma
authored andcommitted
feat: Add MultimodalDataset.toBigframes() method to convert dataset to a Bigframes Dataframe object and inspect the dataset in the notebook.
PiperOrigin-RevId: 758308530
1 parent 0c4f4a6 commit 64dfdbc

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

google/cloud/aiplatform/preview/datasets.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,18 @@ def from_gemini_request_jsonl(
10931093
create_request_timeout=create_request_timeout,
10941094
)
10951095

1096+
def to_bigframes(self) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
1097+
"""Converts a multimodal dataset to a BigFrames dataframe.
1098+
1099+
This is the preferred method to inspect the multimodal dataset in a
1100+
notebook.
1101+
1102+
Returns:
1103+
A BigFrames dataframe.
1104+
"""
1105+
bigframes = _try_import_bigframes()
1106+
return bigframes.pandas.read_gbq_table(self.bigquery_table().lstrip("bq://"))
1107+
10961108
@classmethod
10971109
@base.optional_sync()
10981110
def _create_from_bigquery(

tests/system/aiplatform/test_multimodal_dataset.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,25 @@ def test_create_from_bigframes(self, shared_state):
171171
bigquery_client.delete_table(table_id, not_found_ok=True)
172172
ds.delete()
173173

174+
def test_export_to_bigframes(self, shared_state):
175+
assert shared_state["bigquery_client"]
176+
bigquery_client = shared_state["bigquery_client"]
177+
178+
bpd.options.bigquery.project = _TEST_PROJECT
179+
bpd.options.bigquery.location = _TEST_LOCATION
180+
181+
try:
182+
bf_df_source = bpd.DataFrame(_TEST_DATASET)
183+
ds = datasets.MultimodalDataset.from_bigframes(dataframe=bf_df_source)
184+
bf_df_exported = ds.to_bigframes()
185+
table_id = _uri_to_table_id(ds.bigquery_table)
186+
187+
assert len(bf_df_exported) == len(_TEST_DATASET)
188+
assert set(bf_df_exported.columns) == {"Question", "Answer"}
189+
finally:
190+
bigquery_client.delete_table(table_id, not_found_ok=True)
191+
ds.delete()
192+
174193
def test_assemble_dataset(self, shared_state):
175194
assert shared_state["bigquery_client"]
176195
assert shared_state["bigquery_test_table"]

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy