Skip to content

Commit a69348a

Browse files
authored
fix: keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index (#1535)
1 parent 3c92580 commit a69348a

File tree

2 files changed

+82
-28
lines changed

2 files changed

+82
-28
lines changed

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
481481
# pandas dtype.
482482
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
483483
if bq_type is None:
484-
sample_data = _first_valid(dataframe[column])
484+
sample_data = _first_valid(dataframe.reset_index()[column])
485485
if (
486486
isinstance(sample_data, _BaseGeometry)
487487
and sample_data is not None # Paranoia
@@ -544,7 +544,7 @@ def augment_schema(dataframe, current_bq_schema):
544544
augmented_schema.append(field)
545545
continue
546546

547-
arrow_table = pyarrow.array(dataframe[field.name])
547+
arrow_table = pyarrow.array(dataframe.reset_index()[field.name])
548548

549549
if pyarrow.types.is_list(arrow_table.type):
550550
# `pyarrow.ListType`

tests/unit/test__pandas_helpers.py

Lines changed: 80 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -930,32 +930,6 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
930930
assert columns_and_indexes == expected
931931

932932

933-
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
934-
def test_dataframe_to_bq_schema_dict_sequence(module_under_test):
935-
df_data = collections.OrderedDict(
936-
[
937-
("str_column", ["hello", "world"]),
938-
("int_column", [42, 8]),
939-
("bool_column", [True, False]),
940-
]
941-
)
942-
dataframe = pandas.DataFrame(df_data)
943-
944-
dict_schema = [
945-
{"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
946-
{"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
947-
]
948-
949-
returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema)
950-
951-
expected_schema = (
952-
schema.SchemaField("str_column", "STRING", "NULLABLE"),
953-
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
954-
schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
955-
)
956-
assert returned_schema == expected_schema
957-
958-
959933
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
960934
def test_dataframe_to_arrow_with_multiindex(module_under_test):
961935
bq_schema = (
@@ -1190,6 +1164,86 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
11901164
assert call_args.kwargs.get("compression") == "ZSTD"
11911165

11921166

1167+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1168+
def test_dataframe_to_bq_schema_w_named_index(module_under_test):
1169+
df_data = collections.OrderedDict(
1170+
[
1171+
("str_column", ["hello", "world"]),
1172+
("int_column", [42, 8]),
1173+
("bool_column", [True, False]),
1174+
]
1175+
)
1176+
index = pandas.Index(["a", "b"], name="str_index")
1177+
dataframe = pandas.DataFrame(df_data, index=index)
1178+
1179+
returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, [])
1180+
1181+
expected_schema = (
1182+
schema.SchemaField("str_index", "STRING", "NULLABLE"),
1183+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1184+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1185+
schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
1186+
)
1187+
assert returned_schema == expected_schema
1188+
1189+
1190+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1191+
def test_dataframe_to_bq_schema_w_multiindex(module_under_test):
1192+
df_data = collections.OrderedDict(
1193+
[
1194+
("str_column", ["hello", "world"]),
1195+
("int_column", [42, 8]),
1196+
("bool_column", [True, False]),
1197+
]
1198+
)
1199+
index = pandas.MultiIndex.from_tuples(
1200+
[
1201+
("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)),
1202+
("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)),
1203+
],
1204+
names=["str_index", "int_index", "dt_index"],
1205+
)
1206+
dataframe = pandas.DataFrame(df_data, index=index)
1207+
1208+
returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, [])
1209+
1210+
expected_schema = (
1211+
schema.SchemaField("str_index", "STRING", "NULLABLE"),
1212+
schema.SchemaField("int_index", "INTEGER", "NULLABLE"),
1213+
schema.SchemaField("dt_index", "DATETIME", "NULLABLE"),
1214+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1215+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1216+
schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
1217+
)
1218+
assert returned_schema == expected_schema
1219+
1220+
1221+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1222+
def test_dataframe_to_bq_schema_w_bq_schema(module_under_test):
1223+
df_data = collections.OrderedDict(
1224+
[
1225+
("str_column", ["hello", "world"]),
1226+
("int_column", [42, 8]),
1227+
("bool_column", [True, False]),
1228+
]
1229+
)
1230+
dataframe = pandas.DataFrame(df_data)
1231+
1232+
dict_schema = [
1233+
{"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
1234+
{"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
1235+
]
1236+
1237+
returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema)
1238+
1239+
expected_schema = (
1240+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1241+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1242+
schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
1243+
)
1244+
assert returned_schema == expected_schema
1245+
1246+
11931247
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
11941248
def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
11951249
dataframe = pandas.DataFrame(

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy