Skip to content

Commit

Permalink
make flare cleanup task safer, conver to manual task for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
nora-codecov committed Dec 28, 2024
1 parent b6fd340 commit 5ac629e
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 35 deletions.
19 changes: 9 additions & 10 deletions celery_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from shared.celery_config import (
BaseCeleryConfig,
brolly_stats_rollup_task_name,
flare_cleanup_task_name,
# flare_cleanup_task_name,
gh_app_webhook_check_task_name,
health_check_task_name,
profiling_finding_task_name,
Expand Down Expand Up @@ -89,19 +89,18 @@ def _beat_schedule():
},
"trial_expiration_cron": {
"task": trial_expiration_cron_task_name,
# 4 UTC is 12am EDT
"schedule": crontab(minute="0", hour="4"),
"kwargs": {
"cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
},
},
"flare_cleanup": {
"task": flare_cleanup_task_name,
"schedule": crontab(minute="0", hour="4"), # every day, 4am UTC (8pm PT)
"schedule": crontab(minute="0", hour="4"), # 4 UTC is 12am EDT
"kwargs": {
"cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
},
},
# "flare_cleanup": {
# "task": flare_cleanup_task_name,
# "schedule": crontab(minute="0", hour="5"), # every day, 5am UTC (10pm PDT)
# "kwargs": {
# "cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
# },
# },
}

if get_config("setup", "find_uncollected_profilings", "enabled", default=True):
Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
https://github.com/codecov/test-results-parser/archive/c840502d1b4dd7d05b2efc2c1328affaf2acd27c.tar.gz#egg=test-results-parser
https://github.com/codecov/shared/archive/2674ae99811767e63151590906691aed4c5ce1f9.tar.gz#egg=shared
https://github.com/codecov/shared/archive/96d0b0ce0ac9ef14b74ac97e6ca2c7659032887d.tar.gz#egg=shared
https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2f887936a98a668dda42.tar.gz#egg=timestring
asgiref>=3.7.2
analytics-python==1.3.0b1
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ sentry-sdk==2.13.0
# shared
setuptools==75.6.0
# via nodeenv
shared @ https://github.com/codecov/shared/archive/2674ae99811767e63151590906691aed4c5ce1f9.tar.gz#egg=shared
shared @ https://github.com/codecov/shared/archive/96d0b0ce0ac9ef14b74ac97e6ca2c7659032887d.tar.gz#egg=shared
# via -r requirements.in
six==1.16.0
# via
Expand Down
73 changes: 54 additions & 19 deletions tasks/flare_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,73 @@ class FlareCleanupTask(CodecovCronTask, name=flare_cleanup_task_name):
def get_min_seconds_interval_between_executions(cls):
return 72000 # 20h

def run_cron_task(self, db_session, *args, **kwargs):
# for any Pull that is not OPEN, clear the flare field(s)
non_open_pulls = Pull.objects.exclude(state=PullStates.OPEN.value)
def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwargs):
# for any Pull that is not OPEN, clear the flare field(s), targeting older data
non_open_pulls = Pull.objects.exclude(state=PullStates.OPEN.value).order_by(
"updatestamp"
)

log.info("Starting FlareCleanupTask")

# clear in db
non_open_pulls_with_flare_in_db = non_open_pulls.filter(
_flare__isnull=False
).exclude(_flare={})
# single query, objs are not loaded into memory, does not call .save(), does not refresh updatestamp
n_updated = non_open_pulls_with_flare_in_db.update(_flare=None)
log.info(f"FlareCleanupTask cleared {n_updated} _flares")

# Process in batches using an offset
total_updated = 0
offset = 0
while offset < limit:
batch = non_open_pulls_with_flare_in_db.values_list("id", flat=True)[
offset : offset + batch_size
]
if not batch:
break
n_updated = non_open_pulls_with_flare_in_db.filter(id__in=batch).update(
_flare=None
)
total_updated += n_updated
offset += batch_size

log.info(f"FlareCleanupTask cleared {total_updated} database flares")

# clear in Archive
non_open_pulls_with_flare_in_archive = non_open_pulls.filter(
_flare_storage_path__isnull=False
).select_related("repository")
log.info(
f"FlareCleanupTask will clear {non_open_pulls_with_flare_in_archive.count()} Archive flares"
)
# single query, loads all pulls and repos in qset into memory, deletes file in Archive 1 by 1
for pull in non_open_pulls_with_flare_in_archive:
archive_service = ArchiveService(repository=pull.repository)
archive_service.delete_file(pull._flare_storage_path)

# single query, objs are not loaded into memory, does not call .save(), does not refresh updatestamp
n_updated = non_open_pulls_with_flare_in_archive.update(
_flare_storage_path=None
)

log.info(f"FlareCleanupTask cleared {n_updated} Archive flares")
# Process archive deletions in batches using an offset
total_updated = 0
offset = 0
while offset < limit:
batch = non_open_pulls_with_flare_in_archive.values_list("id", flat=True)[
offset : offset + batch_size
]
if not batch:
break
flare_paths_from_batch = Pull.objects.filter(id__in=batch).values_list(
"_flare_storage_path", flat=True
)
try:
archive_service = ArchiveService()
archive_service.delete_files(flare_paths_from_batch)
except Exception as e:

Check warning on line 78 in tasks/flare_cleanup.py

View check run for this annotation

Codecov Notifications / codecov/patch

tasks/flare_cleanup.py#L78

Added line #L78 was not covered by tests
# if something fails with deleting from archive, leave the _flare_storage_path on the pull object.
# only delete _flare_storage_path if the deletion from archive was successful.
log.error(f"FlareCleanupTask failed to delete archive files: {e}")
continue

Check warning on line 82 in tasks/flare_cleanup.py

View check run for this annotation

Codecov Notifications / codecov/patch

tasks/flare_cleanup.py#L81-L82

Added lines #L81 - L82 were not covered by tests

# Update the _flare_storage_path field for successfully processed pulls
n_updated = Pull.objects.filter(id__in=batch).update(
_flare_storage_path=None
)
total_updated += n_updated
offset += batch_size

log.info(f"FlareCleanupTask cleared {total_updated} Archive flares")

def manual_run(self, db_session=None, limit=1000, *args, **kwargs):
self.run_cron_task(db_session, limit=limit, *args, **kwargs)

Check warning on line 94 in tasks/flare_cleanup.py

View check run for this annotation

Codecov Notifications / codecov/patch

tasks/flare_cleanup.py#L94

Added line #L94 was not covered by tests


RegisteredFlareCleanupTask = celery_app.register_task(FlareCleanupTask())
Expand Down
6 changes: 2 additions & 4 deletions tasks/tests/unit/test_flare_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,7 @@ def test_successful_run(self, transactional_db, mocker):
mock_logs.assert_has_calls(
[
call("Starting FlareCleanupTask"),
call("FlareCleanupTask cleared 1 _flares"),
call("FlareCleanupTask will clear 1 Archive flares"),
call("FlareCleanupTask cleared 1 database flares"),
call("FlareCleanupTask cleared 1 Archive flares"),
]
)
Expand Down Expand Up @@ -120,8 +119,7 @@ def test_successful_run(self, transactional_db, mocker):
mock_logs.assert_has_calls(
[
call("Starting FlareCleanupTask"),
call("FlareCleanupTask cleared 0 _flares"),
call("FlareCleanupTask will clear 0 Archive flares"),
call("FlareCleanupTask cleared 0 database flares"),
call("FlareCleanupTask cleared 0 Archive flares"),
]
)

0 comments on commit 5ac629e

Please sign in to comment.