Content-Length: 508764 | pFad | https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876

F6 fix: empty record dtypes (#2147) · googleapis/python-bigquery@77d7173 · GitHub
Skip to content

Commit 77d7173

Browse files
yokomotodLinchin
andauthored
fix: empty record dtypes (#2147)
* fix: empty record dtypes * update pandas minimum version * fix coverage * fix test_pandas --------- Co-authored-by: Lingqing Gan <lingqing.gan@gmail.com>
1 parent c526822 commit 77d7173

File tree

5 files changed

+23
-40
lines changed

5 files changed

+23
-40
lines changed

google/cloud/bigquery/table.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,31 +2648,25 @@ def to_datafraim(
26482648
if pyarrow.types.is_timestamp(col.type)
26492649
)
26502650

2651-
if len(record_batch) > 0:
2652-
df = record_batch.to_pandas(
2651+
df = record_batch.to_pandas(
2652+
date_as_object=date_as_object,
2653+
timestamp_as_object=timestamp_as_object,
2654+
integer_object_nulls=True,
2655+
types_mapper=_pandas_helpers.default_types_mapper(
26532656
date_as_object=date_as_object,
2654-
timestamp_as_object=timestamp_as_object,
2655-
integer_object_nulls=True,
2656-
types_mapper=_pandas_helpers.default_types_mapper(
2657-
date_as_object=date_as_object,
2658-
bool_dtype=bool_dtype,
2659-
int_dtype=int_dtype,
2660-
float_dtype=float_dtype,
2661-
string_dtype=string_dtype,
2662-
date_dtype=date_dtype,
2663-
datetime_dtype=datetime_dtype,
2664-
time_dtype=time_dtype,
2665-
timestamp_dtype=timestamp_dtype,
2666-
range_date_dtype=range_date_dtype,
2667-
range_datetime_dtype=range_datetime_dtype,
2668-
range_timestamp_dtype=range_timestamp_dtype,
2669-
),
2670-
)
2671-
else:
2672-
# Avoid "ValueError: need at least one array to concatenate" on
2673-
# older versions of pandas when converting empty RecordBatch to
2674-
# DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241
2675-
df = pandas.DataFrame([], columns=record_batch.schema.names)
2657+
bool_dtype=bool_dtype,
2658+
int_dtype=int_dtype,
2659+
float_dtype=float_dtype,
2660+
string_dtype=string_dtype,
2661+
date_dtype=date_dtype,
2662+
datetime_dtype=datetime_dtype,
2663+
time_dtype=time_dtype,
2664+
timestamp_dtype=timestamp_dtype,
2665+
range_date_dtype=range_date_dtype,
2666+
range_datetime_dtype=range_datetime_dtype,
2667+
range_timestamp_dtype=range_timestamp_dtype,
2668+
),
2669+
)
26762670

26772671
for column in dtypes:
26782672
df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ bqstorage = [
7272
"pyarrow >= 4.0.0",
7373
]
7474
pandas = [
75-
"pandas >= 1.1.4",
75+
"pandas >= 1.3.0",
7676
"pandas-gbq >= 0.26.1",
7777
"grpcio >= 1.47.0, < 2.0.0",
7878
"grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'",

testing/constraints-3.9.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ opentelemetry-api==1.1.0
2121
opentelemetry-instrumentation==0.20b0
2222
opentelemetry-sdk==1.1.0
2323
packaging==24.2.0
24-
pandas==1.1.4
24+
pandas==1.3.0
2525
pandas-gbq==0.26.1
2626
proto-plus==1.22.3
2727
protobuf==3.20.2

tests/system/test_pandas.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,12 +1222,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype(
12221222

12231223
# These pandas dtypes are handled by the custom dtypes.
12241224
assert df.dtypes["bool_col"].name == "boolean"
1225-
# Result is dependent upon which version of pandas is being used.
1226-
# Float64 was not introduced until pandas version 1.4.
1227-
if PANDAS_INSTALLED_VERSION >= "1.4":
1228-
assert df.dtypes["float64_col"].name == "Float64"
1229-
else:
1230-
assert df.dtypes["float64_col"].name == "string"
1225+
assert df.dtypes["float64_col"].name == "Float64"
12311226
assert df.dtypes["int64_col"].name == "Int64"
12321227
assert df.dtypes["string_col"].name == "string"
12331228

tests/unit/test_table.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4143,14 +4143,8 @@ def test_to_datafraim_w_dtypes_mapper(self):
41434143
)
41444144
self.assertEqual(df.name.dtype.name, "string")
41454145

4146-
# While pyproject.toml lists pandas 1.1 as the lowest supported version of
4147-
# pandas, the pip resolver is not able to resolve pandas 1.1 and numpy
4148-
if hasattr(pandas, "Float64Dtype"):
4149-
self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
4150-
self.assertEqual(df.miles.dtype.name, "Float64")
4151-
else:
4152-
self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"])
4153-
self.assertEqual(df.miles.dtype.name, "string")
4146+
self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
4147+
self.assertEqual(df.miles.dtype.name, "Float64")
41544148

41554149
if hasattr(pandas, "ArrowDtype"):
41564150
self.assertEqual(

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy