Content-Length: 902230 | pFad | http://github.com/googleapis/python-bigquery/commit/dac879c993795ee772b0b5aefa1ed327b23f41e1

4F fix: dry run queries with DB API cursor · googleapis/python-bigquery@dac879c · GitHub
Skip to content

Commit dac879c

Browse files
committed
fix: dry run queries with DB API cursor
1 parent 3869e34 commit dac879c

File tree

3 files changed

+167
-10
lines changed

3 files changed

+167
-10
lines changed

google/cloud/bigquery/dbapi/cursor.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""Cursor for the Google BigQuery DB-API."""
1616

1717
import collections
18+
import copy
1819

1920
try:
2021
from collections import abc as collections_abc
@@ -26,6 +27,7 @@
2627
import six
2728

2829
from google.cloud.bigquery import job
30+
from google.cloud.bigquery import table
2931
from google.cloud.bigquery.dbapi import _helpers
3032
from google.cloud.bigquery.dbapi import exceptions
3133
import google.cloud.exceptions
@@ -169,12 +171,24 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
169171
formatted_operation = _format_operation(operation, parameters=parameters)
170172
query_parameters = _helpers.to_query_parameters(parameters)
171173

172-
config = job_config or job.QueryJobConfig(use_legacy_sql=False)
174+
if client._default_query_job_config:
175+
if job_config:
176+
config = job_config._fill_from_default(client._default_query_job_config)
177+
else:
178+
config = copy.deepcopy(client._default_query_job_config)
179+
else:
180+
config = job_config or job.QueryJobConfig(use_legacy_sql=False)
181+
173182
config.query_parameters = query_parameters
174183
self._query_job = client.query(
175184
formatted_operation, job_config=config, job_id=job_id
176185
)
177186

187+
if self._query_job.dry_run:
188+
self.rowcount = 1
189+
self.description = None
190+
return
191+
178192
# Wait for the query to finish.
179193
try:
180194
self._query_job.result()
@@ -207,6 +221,12 @@ def _try_fetch(self, size=None):
207221
"No query results: execute() must be called before fetch."
208222
)
209223

224+
if self._query_job.dry_run:
225+
estimated_bytes = self._query_job.total_bytes_processed
226+
row = table.Row((estimated_bytes,), {"estimated_bytes": 0})
227+
self._query_data = iter([row])
228+
return
229+
210230
is_dml = (
211231
self._query_job.statement_type
212232
and self._query_job.statement_type.upper() != "SELECT"
@@ -290,6 +310,11 @@ def _bqstorage_fetch(self, bqstorage_client):
290310
def fetchone(self):
291311
"""Fetch a single row from the results of the last ``execute*()`` call.
292312
313+
.. note::
314+
If a dry run query was executed, a row with a single value is
315+
returned representing the estimated number of bytes that would be
316+
processed by the query.
317+
293318
Returns:
294319
Tuple:
295320
A tuple representing a row or ``None`` if no more data is
@@ -307,6 +332,11 @@ def fetchone(self):
307332
def fetchmany(self, size=None):
308333
"""Fetch multiple results from the last ``execute*()`` call.
309334
335+
.. note::
336+
If a dry run query was executed, a row with a single value is
337+
returned representing the estimated number of bytes that would be
338+
processed by the query.
339+
310340
.. note::
311341
The size parameter is not used for the request/response size.
312342
Set the ``arraysize`` attribute before calling ``execute()`` to
@@ -343,6 +373,11 @@ def fetchmany(self, size=None):
343373
def fetchall(self):
344374
"""Fetch all remaining results from the last ``execute*()`` call.
345375
376+
.. note::
377+
If a dry run query was executed, a row with a single value is
378+
returned representing the estimated number of bytes that would be
379+
processed by the query.
380+
346381
Returns:
347382
List[Tuple]: A list of all the rows in the results.
348383

tests/system.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,24 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):
17821782
]
17831783
self.assertEqual(fetched_data, expected_data)
17841784

1785+
def test_dbapi_dry_run_query(self):
1786+
from google.cloud.bigquery.job import QueryJobConfig
1787+
1788+
query = """
1789+
SELECT country_name
1790+
FROM `bigquery-public-data.utility_us.country_code_iso`
1791+
WHERE country_name LIKE 'U%'
1792+
"""
1793+
1794+
Config.CURSOR.execute(query, job_config=QueryJobConfig(dry_run=True))
1795+
self.assertEqual(Config.CURSOR.rowcount, 1, "expected a single row")
1796+
1797+
rows = Config.CURSOR.fetchall()
1798+
1799+
row_tuples = [r.values() for r in rows]
1800+
expected = [(3473,)]
1801+
self.assertEqual(row_tuples, expected)
1802+
17851803
def _load_table_for_dml(self, rows, dataset_id, table_id):
17861804
from google.cloud._testing import _NamedTemporaryFile
17871805
from google.cloud.bigquery.job import CreateDisposition

tests/unit/test_dbapi_cursor.py

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,15 @@ def _get_target_class():
3636
def _make_one(self, *args, **kw):
3737
return self._get_target_class()(*args, **kw)
3838

39-
def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None):
39+
def _mock_client(
40+
self,
41+
rows=None,
42+
schema=None,
43+
num_dml_affected_rows=None,
44+
default_query_job_config=None,
45+
dry_run_job=False,
46+
total_bytes_processed=0,
47+
):
4048
from google.cloud.bigquery import client
4149

4250
if rows is None:
@@ -49,8 +57,12 @@ def _mock_client(self, rows=None, schema=None, num_dml_affected_rows=None):
4957
total_rows=total_rows,
5058
schema=schema,
5159
num_dml_affected_rows=num_dml_affected_rows,
60+
dry_run=dry_run_job,
61+
total_bytes_processed=total_bytes_processed,
5262
)
5363
mock_client.list_rows.return_value = rows
64+
mock_client._default_query_job_config = default_query_job_config
65+
5466
return mock_client
5567

5668
def _mock_bqstorage_client(self, rows=None, stream_count=0):
@@ -76,18 +88,31 @@ def _mock_bqstorage_client(self, rows=None, stream_count=0):
7688

7789
return mock_client
7890

79-
def _mock_job(self, total_rows=0, schema=None, num_dml_affected_rows=None):
91+
def _mock_job(
92+
self,
93+
total_rows=0,
94+
schema=None,
95+
num_dml_affected_rows=None,
96+
dry_run=False,
97+
total_bytes_processed=0,
98+
):
8099
from google.cloud.bigquery import job
81100

82101
mock_job = mock.create_autospec(job.QueryJob)
83102
mock_job.error_result = None
84103
mock_job.state = "DONE"
85-
mock_job.result.return_value = mock_job
86-
mock_job._query_results = self._mock_results(
87-
total_rows=total_rows,
88-
schema=schema,
89-
num_dml_affected_rows=num_dml_affected_rows,
90-
)
104+
mock_job.dry_run = dry_run
105+
106+
if dry_run:
107+
mock_job.result.side_effect = exceptions.NotFound
108+
mock_job.total_bytes_processed = total_bytes_processed
109+
else:
110+
mock_job.result.return_value = mock_job
111+
mock_job._query_results = self._mock_results(
112+
total_rows=total_rows,
113+
schema=schema,
114+
num_dml_affected_rows=num_dml_affected_rows,
115+
)
91116

92117
if num_dml_affected_rows is None:
93118
mock_job.statement_type = None # API sends back None for SELECT
@@ -373,7 +398,27 @@ def test_execute_custom_job_id(self):
373398
self.assertEqual(args[0], "SELECT 1;")
374399
self.assertEqual(kwargs["job_id"], "foo")
375400

376-
def test_execute_custom_job_config(self):
401+
def test_execute_w_default_config(self):
402+
from google.cloud.bigquery.dbapi import connect
403+
from google.cloud.bigquery import job
404+
405+
default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True)
406+
client = self._mock_client(
407+
rows=[], num_dml_affected_rows=0, default_query_job_config=default_config
408+
)
409+
connection = connect(client)
410+
cursor = connection.cursor()
411+
412+
cursor.execute("SELECT 1;", job_id="foo")
413+
414+
_, kwargs = client.query.call_args
415+
used_config = kwargs["job_config"]
416+
expected_config = job.QueryJobConfig(
417+
use_legacy_sql=False, flatten_results=True, query_parameters=[]
418+
)
419+
self.assertEqual(used_config._properties, expected_config._properties)
420+
421+
def test_execute_custom_job_config_wo_default_config(self):
377422
from google.cloud.bigquery.dbapi import connect
378423
from google.cloud.bigquery import job
379424

@@ -387,6 +432,29 @@ def test_execute_custom_job_config(self):
387432
self.assertEqual(kwargs["job_id"], "foo")
388433
self.assertEqual(kwargs["job_config"], config)
389434

435+
def test_execute_custom_job_config_w_default_config(self):
436+
from google.cloud.bigquery.dbapi import connect
437+
from google.cloud.bigquery import job
438+
439+
default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True)
440+
client = self._mock_client(
441+
rows=[], num_dml_affected_rows=0, default_query_job_config=default_config
442+
)
443+
connection = connect(client)
444+
cursor = connection.cursor()
445+
config = job.QueryJobConfig(use_legacy_sql=True)
446+
447+
cursor.execute("SELECT 1;", job_id="foo", job_config=config)
448+
449+
_, kwargs = client.query.call_args
450+
used_config = kwargs["job_config"]
451+
expected_config = job.QueryJobConfig(
452+
use_legacy_sql=True, # the config passed to execute() prevails
453+
flatten_results=True, # from the default
454+
query_parameters=[],
455+
)
456+
self.assertEqual(used_config._properties, expected_config._properties)
457+
390458
def test_execute_w_dml(self):
391459
from google.cloud.bigquery.dbapi import connect
392460

@@ -442,6 +510,40 @@ def test_execute_w_query(self):
442510
row = cursor.fetchone()
443511
self.assertIsNone(row)
444512

513+
def test_execute_w_query_dry_run(self):
514+
from google.cloud.bigquery.job import QueryJobConfig
515+
from google.cloud.bigquery.schema import SchemaField
516+
from google.cloud.bigquery import dbapi
517+
518+
connection = dbapi.connect(
519+
self._mock_client(
520+
rows=[("hello", "world", 1), ("howdy", "y'all", 2)],
521+
schema=[
522+
SchemaField("a", "STRING", mode="NULLABLE"),
523+
SchemaField("b", "STRING", mode="REQUIRED"),
524+
SchemaField("c", "INTEGER", mode="NULLABLE"),
525+
],
526+
dry_run_job=True,
527+
total_bytes_processed=12345,
528+
)
529+
)
530+
cursor = connection.cursor()
531+
cursor.execute(
532+
"SELECT a, b, c FROM hello_world WHERE d > 3;",
533+
job_config=QueryJobConfig(dry_run=True),
534+
)
535+
536+
self.assertIsNone(cursor.description)
537+
self.assertEqual(cursor.rowcount, 1)
538+
539+
rows = cursor.fetchall()
540+
541+
# We expect a single row with one column - the estimated numbe of bytes
542+
# that will be processed by the query.
543+
self.assertEqual(len(rows), 1)
544+
self.assertEqual(len(rows[0]), 1)
545+
self.assertEqual(rows[0][0], 12345)
546+
445547
def test_execute_raises_if_result_raises(self):
446548
import google.cloud.exceptions
447549

@@ -451,8 +553,10 @@ def test_execute_raises_if_result_raises(self):
451553
from google.cloud.bigquery.dbapi import exceptions
452554

453555
job = mock.create_autospec(job.QueryJob)
556+
job.dry_run = None
454557
job.result.side_effect = google.cloud.exceptions.GoogleCloudError("")
455558
client = mock.create_autospec(client.Client)
559+
client._default_query_job_config = None
456560
client.query.return_value = job
457561
connection = connect(client)
458562
cursor = connection.cursor()

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/googleapis/python-bigquery/commit/dac879c993795ee772b0b5aefa1ed327b23f41e1

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy