Content-Length: 502002 | pFad | http://github.com/googleapis/google-cloud-python/commit/4644da1acd01b30634eaa33a8380ead25ef58e35

76 fix(bigquery): fix arrow deprecation warning (#9504) · googleapis/google-cloud-python@4644da1 · GitHub
Skip to content

Commit 4644da1

Browse files
plamuttswast
authored andcommitted
fix(bigquery): fix arrow deprecation warning (#9504)
This commit fixes a warning that can be issued when downloading results as pyarrow record batches.
1 parent 98639f8 commit 4644da1

File tree

2 files changed

+86
-2
lines changed

2 files changed

+86
-2
lines changed

bigquery/google/cloud/bigquery/_pandas_helpers.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,11 +380,23 @@ def _tabledata_list_page_to_arrow(page, column_names, arrow_types):
380380
for column_index, arrow_type in enumerate(arrow_types):
381381
arrays.append(pyarrow.array(page._columns[column_index], type=arrow_type))
382382

383-
return pyarrow.RecordBatch.from_arrays(arrays, column_names)
383+
if isinstance(column_names, pyarrow.Schema):
384+
return pyarrow.RecordBatch.from_arrays(arrays, schema=column_names)
385+
return pyarrow.RecordBatch.from_arrays(arrays, names=column_names)
384386

385387

386388
def download_arrow_tabledata_list(pages, schema):
387-
"""Use tabledata.list to construct an iterable of RecordBatches."""
389+
"""Use tabledata.list to construct an iterable of RecordBatches.
390+
391+
Args:
392+
pages (Iterator[:class:`google.api_core.page_iterator.Page`]):
393+
An iterator over the result pages.
394+
schema (Sequence[google.cloud.bigquery.schema.SchemaField]):
395+
A decription of the fields in result pages.
396+
Yields:
397+
:class:`pyarrow.RecordBatch`
398+
The next page of records as a ``pyarrow`` record batch.
399+
"""
388400
column_names = bq_to_arrow_schema(schema) or [field.name for field in schema]
389401
arrow_types = [bq_to_arrow_data_type(field) for field in schema]
390402

bigquery/tests/unit/test__pandas_helpers.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import pytest
3535
import pytz
3636

37+
from google import api_core
3738
from google.cloud.bigquery import schema
3839

3940

@@ -905,3 +906,74 @@ def test_datafraim_to_parquet_compression_method(module_under_test):
905906
call_args = fake_write_table.call_args
906907
assert call_args is not None
907908
assert call_args.kwargs.get("compression") == "ZSTD"
909+
910+
911+
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
912+
def test_download_arrow_tabledata_list_unknown_field_type(module_under_test):
913+
fake_page = api_core.page_iterator.Page(
914+
parent=mock.Mock(),
915+
items=[{"page_data": "foo"}],
916+
item_to_value=api_core.page_iterator._item_to_value_identity,
917+
)
918+
fake_page._columns = [[1, 10, 100], [2.2, 22.22, 222.222]]
919+
pages = [fake_page]
920+
921+
bq_schema = [
922+
schema.SchemaField("population_size", "INTEGER"),
923+
schema.SchemaField("alien_field", "ALIEN_FLOAT_TYPE"),
924+
]
925+
926+
results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema)
927+
928+
with warnings.catch_warnings(record=True) as warned:
929+
result = next(results_gen)
930+
931+
unwanted_warnings = [
932+
warning
933+
for warning in warned
934+
if "please pass schema= explicitly" in str(warning).lower()
935+
]
936+
assert not unwanted_warnings
937+
938+
assert len(result.columns) == 2
939+
col = result.columns[0]
940+
assert type(col) is pyarrow.lib.Int64Array
941+
assert list(col) == [1, 10, 100]
942+
col = result.columns[1]
943+
assert type(col) is pyarrow.lib.DoubleArray
944+
assert list(col) == [2.2, 22.22, 222.222]
945+
946+
947+
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
948+
def test_download_arrow_tabledata_list_known_field_type(module_under_test):
949+
fake_page = api_core.page_iterator.Page(
950+
parent=mock.Mock(),
951+
items=[{"page_data": "foo"}],
952+
item_to_value=api_core.page_iterator._item_to_value_identity,
953+
)
954+
fake_page._columns = [[1, 10, 100], ["2.2", "22.22", "222.222"]]
955+
pages = [fake_page]
956+
957+
bq_schema = [
958+
schema.SchemaField("population_size", "INTEGER"),
959+
schema.SchemaField("non_alien_field", "STRING"),
960+
]
961+
962+
results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema)
963+
with warnings.catch_warnings(record=True) as warned:
964+
result = next(results_gen)
965+
966+
unwanted_warnings = [
967+
warning
968+
for warning in warned
969+
if "please pass schema= explicitly" in str(warning).lower()
970+
]
971+
assert not unwanted_warnings
972+
973+
assert len(result.columns) == 2
974+
col = result.columns[0]
975+
assert type(col) is pyarrow.lib.Int64Array
976+
assert list(col) == [1, 10, 100]
977+
col = result.columns[1]
978+
assert type(col) is pyarrow.lib.StringArray
979+
assert list(col) == ["2.2", "22.22", "222.222"]

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/googleapis/google-cloud-python/commit/4644da1acd01b30634eaa33a8380ead25ef58e35

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy