Skip to content

Commit 7092cfd

Browse files
Fix bad delete logic for dagruns (#32684)
Co-authored-by: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com>
1 parent bbd2902 commit 7092cfd

File tree

2 files changed

+55
-4
lines changed

2 files changed

+55
-4
lines changed

airflow/www/utils.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from airflow.utils.code_utils import get_python_source
5151
from airflow.utils.helpers import alchemy_to_dict
5252
from airflow.utils.json import WebEncoder
53+
from airflow.utils.sqlalchemy import tuple_in_condition
5354
from airflow.utils.state import State, TaskInstanceState
5455
from airflow.www.forms import DateTimeWithTimezoneField
5556
from airflow.www.widgets import AirflowDateTimePickerWidget
@@ -60,6 +61,8 @@
6061

6162
from airflow.www.fab_security.sqla.manager import SecurityManager
6263

64+
TI = TaskInstance
65+
6366

6467
def datetime_to_string(value: DateTime | None) -> str | None:
6568
if value is None:
@@ -844,12 +847,17 @@ class DagRunCustomSQLAInterface(CustomSQLAInterface):
844847
"""
845848

846849
def delete(self, item: Model, raise_exception: bool = False) -> bool:
847-
self.session.execute(delete(TaskInstance).where(TaskInstance.run_id == item.run_id))
850+
self.session.execute(delete(TI).where(TI.dag_id == item.dag_id, TI.run_id == item.run_id))
848851
return super().delete(item, raise_exception=raise_exception)
849852

850853
def delete_all(self, items: list[Model]) -> bool:
851854
self.session.execute(
852-
delete(TaskInstance).where(TaskInstance.run_id.in_(item.run_id for item in items))
855+
delete(TI).where(
856+
tuple_in_condition(
857+
(TI.dag_id, TI.run_id),
858+
((x.dag_id, x.run_id) for x in items),
859+
)
860+
)
853861
)
854862
return super().delete_all(items)
855863

tests/www/test_utils.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,20 @@
1717
# under the License.
1818
from __future__ import annotations
1919

20+
import itertools
2021
import re
2122
from datetime import datetime
2223
from unittest.mock import Mock
2324
from urllib.parse import parse_qs
2425

26+
import pendulum
2527
from bs4 import BeautifulSoup
2628
from markupsafe import Markup
2729

30+
from airflow.models import DagRun
2831
from airflow.utils import json as utils_json
2932
from airflow.www import utils
30-
from airflow.www.utils import json_f, wrapped_markdown
33+
from airflow.www.utils import DagRunCustomSQLAInterface, json_f, wrapped_markdown
3134

3235

3336
class TestUtils:
@@ -156,7 +159,6 @@ def test_state_token(self):
156159
assert "<script>alert(1)</script>" not in html
157160

158161
def test_task_instance_link(self):
159-
160162
from airflow.www.app import cached_app
161163

162164
with cached_app(testing=True).test_request_context():
@@ -413,3 +415,44 @@ def test_wrapped_markdown_with_collapsible_section(self):
413415
</div>"""
414416
== rendered
415417
)
418+
419+
420+
def test_dag_run_custom_sqla_interface_delete_no_collateral_damage(dag_maker, session):
421+
interface = DagRunCustomSQLAInterface(obj=DagRun, session=session)
422+
dag_ids = (f"test_dag_{x}" for x in range(1, 4))
423+
dates = (pendulum.datetime(2023, 1, x) for x in range(1, 4))
424+
for dag_id, date in itertools.product(dag_ids, dates):
425+
with dag_maker(dag_id=dag_id) as dag:
426+
dag.create_dagrun(execution_date=date, state="running", run_type="scheduled")
427+
dag_runs = session.query(DagRun).all()
428+
assert len(dag_runs) == 9
429+
assert len(set(x.run_id for x in dag_runs)) == 3
430+
run_id_for_single_delete = "scheduled__2023-01-01T00:00:00+00:00"
431+
# we have 3 runs with this same run_id
432+
assert len(list(x for x in dag_runs if x.run_id == run_id_for_single_delete)) == 3
433+
# each is a different dag
434+
435+
# if we delete one, it shouldn't delete the others
436+
one_run = [x for x in dag_runs if x.run_id == run_id_for_single_delete][0]
437+
assert interface.delete(item=one_run) is True
438+
session.commit()
439+
dag_runs = session.query(DagRun).all()
440+
# we should have one fewer dag run now
441+
assert len(dag_runs) == 8
442+
443+
# now let's try multi delete
444+
run_id_for_multi_delete = "scheduled__2023-01-02T00:00:00+00:00"
445+
# verify we have 3
446+
runs_of_interest = [x for x in dag_runs if x.run_id == run_id_for_multi_delete]
447+
assert len(runs_of_interest) == 3
448+
# and that each is different dag
449+
assert len(set(x.dag_id for x in dag_runs)) == 3
450+
451+
to_delete = runs_of_interest[:2]
452+
# now try multi delete
453+
assert interface.delete_all(items=to_delete) is True
454+
session.commit()
455+
dag_runs = session.query(DagRun).all()
456+
assert len(dag_runs) == 6
457+
assert len(set(x.dag_id for x in dag_runs)) == 3
458+
assert len(set(x.run_id for x in dag_runs)) == 3

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy