Skip to content

Commit 77d7173

Browse files
yokomotodLinchin
andauthored
fix: empty record dtypes (googleapis#2147)
* fix: empty record dtypes * update pandas minimum version * fix coverage * fix test_pandas --------- Co-authored-by: Lingqing Gan <lingqing.gan@gmail.com>
1 parent c526822 commit 77d7173

File tree

5 files changed

+23
-40
lines changed

5 files changed

+23
-40
lines changed

google/cloud/bigquery/table.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,31 +2648,25 @@ def to_dataframe(
26482648
if pyarrow.types.is_timestamp(col.type)
26492649
)
26502650

2651-
if len(record_batch) > 0:
2652-
df = record_batch.to_pandas(
2651+
df = record_batch.to_pandas(
2652+
date_as_object=date_as_object,
2653+
timestamp_as_object=timestamp_as_object,
2654+
integer_object_nulls=True,
2655+
types_mapper=_pandas_helpers.default_types_mapper(
26532656
date_as_object=date_as_object,
2654-
timestamp_as_object=timestamp_as_object,
2655-
integer_object_nulls=True,
2656-
types_mapper=_pandas_helpers.default_types_mapper(
2657-
date_as_object=date_as_object,
2658-
bool_dtype=bool_dtype,
2659-
int_dtype=int_dtype,
2660-
float_dtype=float_dtype,
2661-
string_dtype=string_dtype,
2662-
date_dtype=date_dtype,
2663-
datetime_dtype=datetime_dtype,
2664-
time_dtype=time_dtype,
2665-
timestamp_dtype=timestamp_dtype,
2666-
range_date_dtype=range_date_dtype,
2667-
range_datetime_dtype=range_datetime_dtype,
2668-
range_timestamp_dtype=range_timestamp_dtype,
2669-
),
2670-
)
2671-
else:
2672-
# Avoid "ValueError: need at least one array to concatenate" on
2673-
# older versions of pandas when converting empty RecordBatch to
2674-
# DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241
2675-
df = pandas.DataFrame([], columns=record_batch.schema.names)
2657+
bool_dtype=bool_dtype,
2658+
int_dtype=int_dtype,
2659+
float_dtype=float_dtype,
2660+
string_dtype=string_dtype,
2661+
date_dtype=date_dtype,
2662+
datetime_dtype=datetime_dtype,
2663+
time_dtype=time_dtype,
2664+
timestamp_dtype=timestamp_dtype,
2665+
range_date_dtype=range_date_dtype,
2666+
range_datetime_dtype=range_datetime_dtype,
2667+
range_timestamp_dtype=range_timestamp_dtype,
2668+
),
2669+
)
26762670

26772671
for column in dtypes:
26782672
df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ bqstorage = [
7272
"pyarrow >= 4.0.0",
7373
]
7474
pandas = [
75-
"pandas >= 1.1.4",
75+
"pandas >= 1.3.0",
7676
"pandas-gbq >= 0.26.1",
7777
"grpcio >= 1.47.0, < 2.0.0",
7878
"grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'",

testing/constraints-3.9.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ opentelemetry-api==1.1.0
2121
opentelemetry-instrumentation==0.20b0
2222
opentelemetry-sdk==1.1.0
2323
packaging==24.2.0
24-
pandas==1.1.4
24+
pandas==1.3.0
2525
pandas-gbq==0.26.1
2626
proto-plus==1.22.3
2727
protobuf==3.20.2

tests/system/test_pandas.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,12 +1222,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype(
12221222

12231223
# These pandas dtypes are handled by the custom dtypes.
12241224
assert df.dtypes["bool_col"].name == "boolean"
1225-
# Result is dependent upon which version of pandas is being used.
1226-
# Float64 was not introduced until pandas version 1.4.
1227-
if PANDAS_INSTALLED_VERSION >= "1.4":
1228-
assert df.dtypes["float64_col"].name == "Float64"
1229-
else:
1230-
assert df.dtypes["float64_col"].name == "string"
1225+
assert df.dtypes["float64_col"].name == "Float64"
12311226
assert df.dtypes["int64_col"].name == "Int64"
12321227
assert df.dtypes["string_col"].name == "string"
12331228

tests/unit/test_table.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4143,14 +4143,8 @@ def test_to_dataframe_w_dtypes_mapper(self):
41434143
)
41444144
self.assertEqual(df.name.dtype.name, "string")
41454145

4146-
# While pyproject.toml lists pandas 1.1 as the lowest supported version of
4147-
# pandas, the pip resolver is not able to resolve pandas 1.1 and numpy
4148-
if hasattr(pandas, "Float64Dtype"):
4149-
self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
4150-
self.assertEqual(df.miles.dtype.name, "Float64")
4151-
else:
4152-
self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"])
4153-
self.assertEqual(df.miles.dtype.name, "string")
4146+
self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
4147+
self.assertEqual(df.miles.dtype.name, "Float64")
41544148

41554149
if hasattr(pandas, "ArrowDtype"):
41564150
self.assertEqual(

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy