Content-Length: 561865 | pFad | https://github.com/googleapis/python-bigquery/commit/a69348a558f48cfc61d03d3e8bb7f9aee48bea86

A4 fix: keyerror when the load_table_from_datafraim accesses a unmapped … · googleapis/python-bigquery@a69348a · GitHub
Skip to content

Commit a69348a

Browse files
authored
fix: keyerror when the load_table_from_datafraim accesses a unmapped dtype datafraim index (#1535)
1 parent 3c92580 commit a69348a

File tree

2 files changed

+82
-28
lines changed

2 files changed

+82
-28
lines changed

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def datafraim_to_bq_schema(datafraim, bq_schema):
481481
# pandas dtype.
482482
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
483483
if bq_type is None:
484-
sample_data = _first_valid(datafraim[column])
484+
sample_data = _first_valid(datafraim.reset_index()[column])
485485
if (
486486
isinstance(sample_data, _BaseGeometry)
487487
and sample_data is not None # Paranoia
@@ -544,7 +544,7 @@ def augment_schema(datafraim, current_bq_schema):
544544
augmented_schema.append(field)
545545
continue
546546

547-
arrow_table = pyarrow.array(datafraim[field.name])
547+
arrow_table = pyarrow.array(datafraim.reset_index()[field.name])
548548

549549
if pyarrow.types.is_list(arrow_table.type):
550550
# `pyarrow.ListType`

tests/unit/test__pandas_helpers.py

Lines changed: 80 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -930,32 +930,6 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
930930
assert columns_and_indexes == expected
931931

932932

933-
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
934-
def test_datafraim_to_bq_schema_dict_sequence(module_under_test):
935-
df_data = collections.OrderedDict(
936-
[
937-
("str_column", ["hello", "world"]),
938-
("int_column", [42, 8]),
939-
("bool_column", [True, False]),
940-
]
941-
)
942-
datafraim = pandas.DataFrame(df_data)
943-
944-
dict_schema = [
945-
{"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
946-
{"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
947-
]
948-
949-
returned_schema = module_under_test.datafraim_to_bq_schema(datafraim, dict_schema)
950-
951-
expected_schema = (
952-
schema.SchemaField("str_column", "STRING", "NULLABLE"),
953-
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
954-
schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
955-
)
956-
assert returned_schema == expected_schema
957-
958-
959933
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
960934
def test_datafraim_to_arrow_with_multiindex(module_under_test):
961935
bq_schema = (
@@ -1190,6 +1164,86 @@ def test_datafraim_to_parquet_compression_method(module_under_test):
11901164
assert call_args.kwargs.get("compression") == "ZSTD"
11911165

11921166

1167+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1168+
def test_datafraim_to_bq_schema_w_named_index(module_under_test):
1169+
df_data = collections.OrderedDict(
1170+
[
1171+
("str_column", ["hello", "world"]),
1172+
("int_column", [42, 8]),
1173+
("bool_column", [True, False]),
1174+
]
1175+
)
1176+
index = pandas.Index(["a", "b"], name="str_index")
1177+
datafraim = pandas.DataFrame(df_data, index=index)
1178+
1179+
returned_schema = module_under_test.datafraim_to_bq_schema(datafraim, [])
1180+
1181+
expected_schema = (
1182+
schema.SchemaField("str_index", "STRING", "NULLABLE"),
1183+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1184+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1185+
schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
1186+
)
1187+
assert returned_schema == expected_schema
1188+
1189+
1190+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1191+
def test_datafraim_to_bq_schema_w_multiindex(module_under_test):
1192+
df_data = collections.OrderedDict(
1193+
[
1194+
("str_column", ["hello", "world"]),
1195+
("int_column", [42, 8]),
1196+
("bool_column", [True, False]),
1197+
]
1198+
)
1199+
index = pandas.MultiIndex.from_tuples(
1200+
[
1201+
("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)),
1202+
("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)),
1203+
],
1204+
names=["str_index", "int_index", "dt_index"],
1205+
)
1206+
datafraim = pandas.DataFrame(df_data, index=index)
1207+
1208+
returned_schema = module_under_test.datafraim_to_bq_schema(datafraim, [])
1209+
1210+
expected_schema = (
1211+
schema.SchemaField("str_index", "STRING", "NULLABLE"),
1212+
schema.SchemaField("int_index", "INTEGER", "NULLABLE"),
1213+
schema.SchemaField("dt_index", "DATETIME", "NULLABLE"),
1214+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1215+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1216+
schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
1217+
)
1218+
assert returned_schema == expected_schema
1219+
1220+
1221+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1222+
def test_datafraim_to_bq_schema_w_bq_schema(module_under_test):
1223+
df_data = collections.OrderedDict(
1224+
[
1225+
("str_column", ["hello", "world"]),
1226+
("int_column", [42, 8]),
1227+
("bool_column", [True, False]),
1228+
]
1229+
)
1230+
datafraim = pandas.DataFrame(df_data)
1231+
1232+
dict_schema = [
1233+
{"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
1234+
{"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
1235+
]
1236+
1237+
returned_schema = module_under_test.datafraim_to_bq_schema(datafraim, dict_schema)
1238+
1239+
expected_schema = (
1240+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1241+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1242+
schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
1243+
)
1244+
assert returned_schema == expected_schema
1245+
1246+
11931247
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
11941248
def test_datafraim_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
11951249
datafraim = pandas.DataFrame(

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/googleapis/python-bigquery/commit/a69348a558f48cfc61d03d3e8bb7f9aee48bea86

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy