diff --git a/data_registry/admin.py b/data_registry/admin.py index 0c2e02c..e60091d 100644 --- a/data_registry/admin.py +++ b/data_registry/admin.py @@ -2,7 +2,7 @@ from django.conf import settings from django.contrib import admin, messages -from django.db.models import Exists, OuterRef, Q +from django.db.models import Q from django.urls import NoReverseMatch, reverse from django.utils import timezone from django.utils.html import escape @@ -206,22 +206,16 @@ def get_form(self, request, obj=None, **kwargs): return form -class FailedFilter(admin.SimpleListFilter): - title = _("failed") - parameter_name = "failed" +class UnsuccessfulFilter(admin.SimpleListFilter): + title = _("unsuccessful") + parameter_name = "unsuccessful" def lookups(self, request, model_admin): return (("1", _("Yes")),) def queryset(self, request, queryset): if self.value() == "1": - # https://docs.djangoproject.com/en/4.2/ref/models/expressions/#some-examples - failed_tasks = Task.objects.filter( - job=OuterRef("pk"), - status=Task.Status.COMPLETED, - result=Task.Result.FAILED, - ) - return queryset.filter(Exists(failed_tasks)) + return queryset.unsuccessful() return None @@ -248,7 +242,7 @@ class Media: list_display = ["__str__", "country", "collection", "status", "last_task", "active", "archived", "keep_all_data"] # "active" is read-only and uneditable, because at most one job must be set as active for a given collection. list_editable = ["status", "keep_all_data"] - list_filter = ["status", ("active_collection", admin.EmptyFieldListFilter), "archived", FailedFilter] + list_filter = ["status", ("active_collection", admin.EmptyFieldListFilter), "archived", UnsuccessfulFilter] fieldsets = ( ( @@ -339,7 +333,7 @@ def country(self, obj): @admin.display(description="Active", boolean=True) def active(self, obj): - return obj.id == obj.collection.active_job_id + return obj.pk == obj.collection.active_job_id @admin.display(description="Last completed task") def last_task(self, obj): diff --git a/data_registry/forms.py b/data_registry/forms.py index 5098700..719fca6 100644 --- a/data_registry/forms.py +++ b/data_registry/forms.py @@ -71,7 +71,7 @@ def __init__(self, *args, request=None, **kwargs): # It's not obvious how to use limit_choices_to to filter jobs by collection. # https://docs.djangoproject.com/en/4.2/ref/models/fields/#django.db.models.ForeignKey.limit_choices_to self.fields["active_job"].queryset = ( - models.Job.objects.filter(collection=self.instance).complete().order_by(F("id").desc()) + models.Job.objects.filter(collection=self.instance).complete().order_by(F("pk").desc()) ) # Populate choices in the form, not the model, for easier migration between icon sets. diff --git a/data_registry/locale/en/LC_MESSAGES/django.po b/data_registry/locale/en/LC_MESSAGES/django.po index 2a2d5e7..83f55d8 100644 --- a/data_registry/locale/en/LC_MESSAGES/django.po +++ b/data_registry/locale/en/LC_MESSAGES/django.po @@ -53,7 +53,7 @@ msgstr "Past year" msgid "More than a year ago" msgstr "More than a year ago" -#: data_registry/admin.py:134 data_registry/admin.py:254 +#: data_registry/admin.py:134 data_registry/admin.py:248 msgid "Management" msgstr "Management" @@ -71,8 +71,8 @@ msgid "Details" msgstr "Details" #: data_registry/admin.py:209 -msgid "failed" -msgstr "failed" +msgid "unsuccessful" +msgstr "unsuccessful" #: data_registry/admin.py:281 msgid "Data availability" diff --git a/data_registry/locale/en_US/LC_MESSAGES/django.po b/data_registry/locale/en_US/LC_MESSAGES/django.po index 7f78ec7..3f8697e 100644 --- a/data_registry/locale/en_US/LC_MESSAGES/django.po +++ b/data_registry/locale/en_US/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2024-11-07 04:18+0000\n" +"POT-Creation-Date: 2024-11-07 11:21-0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -28,7 +28,7 @@ msgid "incomplete" msgstr "" #: data_registry/admin.py:47 data_registry/admin.py:68 -#: data_registry/admin.py:213 +#: data_registry/admin.py:214 msgid "Yes" msgstr "" @@ -44,7 +44,7 @@ msgstr "" msgid "More than a year ago" msgstr "" -#: data_registry/admin.py:134 data_registry/admin.py:254 +#: data_registry/admin.py:134 data_registry/admin.py:249 msgid "Management" msgstr "" @@ -52,89 +52,89 @@ msgstr "" msgid "Basics" msgstr "" -#: data_registry/admin.py:163 data_registry/admin.py:269 -#: data_registry/templates/detail.html:128 +#: data_registry/admin.py:163 data_registry/admin.py:264 +#: data_registry/templates/detail.html:130 msgid "Overview" msgstr "" -#: data_registry/admin.py:177 +#: data_registry/admin.py:178 msgid "Details" msgstr "" -#: data_registry/admin.py:209 -msgid "failed" +#: data_registry/admin.py:210 +msgid "unsuccessful" msgstr "" -#: data_registry/admin.py:281 +#: data_registry/admin.py:276 msgid "Data availability" msgstr "" -#: data_registry/models.py:136 +#: data_registry/models.py:144 msgid "Africa and Middle East" msgstr "" -#: data_registry/models.py:138 +#: data_registry/models.py:146 msgid "Asia" msgstr "" -#: data_registry/models.py:140 +#: data_registry/models.py:148 msgid "Eastern Europe & Central Asia" msgstr "" -#: data_registry/models.py:142 +#: data_registry/models.py:150 msgid "Europe" msgstr "" -#: data_registry/models.py:144 +#: data_registry/models.py:152 msgid "Latin America & Caribbean" msgstr "" -#: data_registry/models.py:146 +#: data_registry/models.py:154 msgid "North America" msgstr "" -#: data_registry/models.py:148 +#: data_registry/models.py:156 msgid "Oceania" msgstr "" -#: data_registry/models.py:152 data_registry/models.py:172 +#: data_registry/models.py:160 data_registry/models.py:180 msgid "Monthly" msgstr "" -#: data_registry/models.py:154 data_registry/models.py:176 +#: data_registry/models.py:162 data_registry/models.py:184 msgid "Every 6 months" msgstr "" -#: data_registry/models.py:156 data_registry/models.py:178 +#: data_registry/models.py:164 data_registry/models.py:186 msgid "Annually" msgstr "" -#: data_registry/models.py:158 +#: data_registry/models.py:166 msgid "This dataset is no longer updated by the publisher" msgstr "" -#: data_registry/models.py:164 +#: data_registry/models.py:172 msgid "Real time" msgstr "" -#: data_registry/models.py:166 +#: data_registry/models.py:174 msgid "Hourly" msgstr "" -#: data_registry/models.py:168 +#: data_registry/models.py:176 msgid "Daily" msgstr "" -#: data_registry/models.py:170 +#: data_registry/models.py:178 msgid "Weekly" msgstr "" -#: data_registry/models.py:174 +#: data_registry/models.py:182 msgid "Every 3 months" msgstr "" #: data_registry/templates/detail.html:8 data_registry/templates/index.html:21 -#: data_registry/templates/search.html:6 data_registry/views.py:209 +#: data_registry/templates/search.html:6 data_registry/views.py:208 msgid "OCP Data Registry" msgstr "" @@ -145,65 +145,65 @@ msgid "" "source's coverage, features and quality issues." msgstr "" -#: data_registry/templates/detail.html:74 +#: data_registry/templates/detail.html:76 msgid "BACK TO DATASETS SEARCH" msgstr "" -#: data_registry/templates/detail.html:102 +#: data_registry/templates/detail.html:104 #: data_registry/templates/search.html:170 msgid "Available formats:" msgstr "" -#: data_registry/templates/detail.html:117 +#: data_registry/templates/detail.html:119 msgid "ACCESS" msgstr "" -#: data_registry/templates/detail.html:132 +#: data_registry/templates/detail.html:134 #: data_registry/templates/search.html:150 msgid "Data date range:" msgstr "" -#: data_registry/templates/detail.html:138 +#: data_registry/templates/detail.html:140 #: data_registry/templates/search.html:157 msgid "Update frequency:" msgstr "" -#: data_registry/templates/detail.html:140 +#: data_registry/templates/detail.html:142 msgid "Main language:" msgstr "" -#: data_registry/templates/detail.html:142 +#: data_registry/templates/detail.html:144 msgid "OCID prefix:" msgstr "" -#: data_registry/templates/detail.html:144 +#: data_registry/templates/detail.html:146 msgid "License:" msgstr "" -#: data_registry/templates/detail.html:155 +#: data_registry/templates/detail.html:157 msgid "Publication policy:" msgstr "" -#: data_registry/templates/detail.html:163 +#: data_registry/templates/detail.html:160 #: data_registry/templates/search.html:159 msgid "Last retrieved:" msgstr "" -#: data_registry/templates/detail.html:167 +#: data_registry/templates/detail.html:164 #: data_registry/templates/search.html:163 #, python-format msgid "retrieved %(retrieval_frequency)s" msgstr "" -#: data_registry/templates/detail.html:170 +#: data_registry/templates/detail.html:167 msgid "Retrieved from:" msgstr "" -#: data_registry/templates/detail.html:177 +#: data_registry/templates/detail.html:174 msgid "Data available" msgstr "" -#: data_registry/templates/detail.html:181 +#: data_registry/templates/detail.html:178 msgid "" "An individual contracting process has several different stages: tendering, " "awarding, contracting and implementation. You can dive into the details of " @@ -211,132 +211,132 @@ msgid "" "org/latest/en/primer/how/\" target=\"_blank\">tutorial." msgstr "" -#: data_registry/templates/detail.html:188 +#: data_registry/templates/detail.html:185 msgid "Parties" msgstr "" -#: data_registry/templates/detail.html:190 +#: data_registry/templates/detail.html:187 msgid "Count of parties:" msgstr "" -#: data_registry/templates/detail.html:195 +#: data_registry/templates/detail.html:192 msgid "Plannings" msgstr "" -#: data_registry/templates/detail.html:197 +#: data_registry/templates/detail.html:194 msgid "Count of planning activities:" msgstr "" -#: data_registry/templates/detail.html:202 +#: data_registry/templates/detail.html:199 msgid "Tenders" msgstr "" -#: data_registry/templates/detail.html:204 +#: data_registry/templates/detail.html:201 msgid "Count of tenders:" msgstr "" -#: data_registry/templates/detail.html:206 +#: data_registry/templates/detail.html:203 msgid "Count of tenderers:" msgstr "" -#: data_registry/templates/detail.html:208 +#: data_registry/templates/detail.html:205 msgid "Count of tender items:" msgstr "" -#: data_registry/templates/detail.html:213 +#: data_registry/templates/detail.html:210 msgid "Awards" msgstr "" -#: data_registry/templates/detail.html:215 +#: data_registry/templates/detail.html:212 msgid "Count of awards:" msgstr "" -#: data_registry/templates/detail.html:217 +#: data_registry/templates/detail.html:214 msgid "Count of award suppliers:" msgstr "" -#: data_registry/templates/detail.html:219 +#: data_registry/templates/detail.html:216 msgid "Count of award items:" msgstr "" -#: data_registry/templates/detail.html:224 +#: data_registry/templates/detail.html:221 msgid "Contracts" msgstr "" -#: data_registry/templates/detail.html:226 +#: data_registry/templates/detail.html:223 msgid "Count of contracts:" msgstr "" -#: data_registry/templates/detail.html:228 +#: data_registry/templates/detail.html:225 msgid "Count of contract items:" msgstr "" -#: data_registry/templates/detail.html:230 +#: data_registry/templates/detail.html:227 msgid "Count of contract transactions:" msgstr "" -#: data_registry/templates/detail.html:235 +#: data_registry/templates/detail.html:232 msgid "Documents" msgstr "" -#: data_registry/templates/detail.html:237 +#: data_registry/templates/detail.html:234 msgid "Count of documents:" msgstr "" -#: data_registry/templates/detail.html:242 +#: data_registry/templates/detail.html:239 msgid "Milestones" msgstr "" -#: data_registry/templates/detail.html:244 +#: data_registry/templates/detail.html:241 msgid "Count of milestones:" msgstr "" -#: data_registry/templates/detail.html:249 +#: data_registry/templates/detail.html:246 msgid "Amendments" msgstr "" -#: data_registry/templates/detail.html:251 +#: data_registry/templates/detail.html:248 msgid "Count of amendments:" msgstr "" -#: data_registry/templates/detail.html:258 +#: data_registry/templates/detail.html:255 msgid "This dataset has not yet been retrieved." msgstr "" -#: data_registry/templates/detail.html:278 +#: data_registry/templates/detail.html:275 msgid "Data quality" msgstr "" -#: data_registry/templates/detail.html:281 +#: data_registry/templates/detail.html:278 msgid "Summary" msgstr "" -#: data_registry/templates/detail.html:293 +#: data_registry/templates/detail.html:290 msgid "We have not yet prepared a data quality summary for this dataset." msgstr "" -#: data_registry/templates/detail.html:299 +#: data_registry/templates/detail.html:296 msgid "Last reviewed:" msgstr "" -#: data_registry/templates/detail.html:308 +#: data_registry/templates/detail.html:305 msgid "Access data" msgstr "" -#: data_registry/templates/detail.html:310 +#: data_registry/templates/detail.html:307 msgid "" "This OCDS dataset is available for download in JSON, Excel or CSV format. " "You can download the data for contracting processes in a specific year or " "for all time." msgstr "" -#: data_registry/templates/detail.html:315 +#: data_registry/templates/detail.html:312 msgid "" "Each contracting process is represented as one line of JSON text in the " ".jsonl file." msgstr "" -#: data_registry/templates/detail.html:320 +#: data_registry/templates/detail.html:317 msgid "" "The .jsonl file is compressed using Gzip. Windows users need 7-Zip, .gz file." msgstr "" -#: data_registry/templates/detail.html:329 +#: data_registry/templates/detail.html:326 msgid "" "Each contracting process is represented as one row in the main " "sheet. Other sheets link to it via the _link_main column." msgstr "" -#: data_registry/templates/detail.html:334 +#: data_registry/templates/detail.html:331 msgid "" "Excel files can have at most main.csv file. Other files link to it via the _link_main column." msgstr "" -#: data_registry/templates/detail.html:348 +#: data_registry/templates/detail.html:345 msgid "" "The .csv files are archived using tar and compressed using " "Gzip. Windows users need WinZip to decompress the .tar.gz file." msgstr "" -#: data_registry/templates/detail.html:367 +#: data_registry/templates/detail.html:364 msgid "Have questions, feedback on this dataset or content on this page?" msgstr "" -#: data_registry/templates/detail.html:372 +#: data_registry/templates/detail.html:369 msgid "Contact Data Support Team" msgstr "" @@ -435,7 +435,7 @@ msgstr "" #: data_registry/templates/includes/footer.html:129 #: data_registry/templates/includes/header.html:11 -#: data_registry/templates/index.html:6 data_registry/views.py:199 +#: data_registry/templates/index.html:6 data_registry/views.py:198 msgid "Open Contracting Partnership" msgstr "" @@ -452,7 +452,7 @@ msgid "Data Registry" msgstr "" #: data_registry/templates/index.html:7 data_registry/templates/index.html:22 -#: data_registry/views.py:210 +#: data_registry/views.py:209 msgid "Search for and access datasets by country" msgstr "" @@ -577,63 +577,63 @@ msgstr "" msgid "[data-registry] Re: %(collection)s" msgstr "" -#: data_registry/views.py:75 data_registry/views.py:325 +#: data_registry/views.py:74 data_registry/views.py:324 msgid "All" msgstr "" -#: data_registry/views.py:76 +#: data_registry/views.py:75 msgid "Past month" msgstr "" -#: data_registry/views.py:77 +#: data_registry/views.py:76 msgid "Past 6 months" msgstr "" -#: data_registry/views.py:78 +#: data_registry/views.py:77 msgid "Past year" msgstr "" -#: data_registry/views.py:79 +#: data_registry/views.py:78 msgid "Past 5 years" msgstr "" -#: data_registry/views.py:89 +#: data_registry/views.py:88 msgid "Parties data" msgstr "" -#: data_registry/views.py:90 +#: data_registry/views.py:89 msgid "Plannings data" msgstr "" -#: data_registry/views.py:91 +#: data_registry/views.py:90 msgid "Tenders data" msgstr "" -#: data_registry/views.py:92 +#: data_registry/views.py:91 msgid "Awards data" msgstr "" -#: data_registry/views.py:93 +#: data_registry/views.py:92 msgid "Contracts data" msgstr "" -#: data_registry/views.py:94 +#: data_registry/views.py:93 msgid "Documents data" msgstr "" -#: data_registry/views.py:95 +#: data_registry/views.py:94 msgid "Milestones data" msgstr "" -#: data_registry/views.py:96 +#: data_registry/views.py:95 msgid "Amendments data" msgstr "" -#: data_registry/views.py:184 +#: data_registry/views.py:183 #, python-format msgid "OCDS data for %(country)s: %(title)s" msgstr "" -#: data_registry/views.py:365 +#: data_registry/views.py:364 msgid "OCP Kingfisher Database" msgstr "" diff --git a/data_registry/locale/es/LC_MESSAGES/django.po b/data_registry/locale/es/LC_MESSAGES/django.po index 6e7a900..65ede4c 100644 --- a/data_registry/locale/es/LC_MESSAGES/django.po +++ b/data_registry/locale/es/LC_MESSAGES/django.po @@ -54,7 +54,7 @@ msgstr "Año anterior" msgid "More than a year ago" msgstr "Hace más de un año" -#: data_registry/admin.py:134 data_registry/admin.py:254 +#: data_registry/admin.py:134 data_registry/admin.py:248 msgid "Management" msgstr "Administración" @@ -72,7 +72,7 @@ msgid "Details" msgstr "Detalles" #: data_registry/admin.py:209 -msgid "failed" +msgid "unsuccessful" msgstr "fallido" #: data_registry/admin.py:281 @@ -648,59 +648,59 @@ msgstr "Volver arriba" msgid "[data-registry] Re: %(collection)s" msgstr "[data-registry] Re: %(collection)s" -#: data_registry/views.py:75 data_registry/views.py:325 +#: data_registry/views.py:74 data_registry/views.py:324 msgid "All" msgstr "Todos" -#: data_registry/views.py:76 +#: data_registry/views.py:75 msgid "Past month" msgstr "Mes pasado" -#: data_registry/views.py:77 +#: data_registry/views.py:76 msgid "Past 6 months" msgstr "Últimos 6 meses" -#: data_registry/views.py:78 +#: data_registry/views.py:77 msgid "Past year" msgstr "Año pasado" -#: data_registry/views.py:79 +#: data_registry/views.py:78 msgid "Past 5 years" msgstr "Últimos 5 años" -#: data_registry/views.py:89 +#: data_registry/views.py:88 msgid "Parties data" msgstr "Datos de las partes involucradas" -#: data_registry/views.py:90 +#: data_registry/views.py:89 msgid "Plannings data" msgstr "Datos de planeación" -#: data_registry/views.py:91 +#: data_registry/views.py:90 msgid "Tenders data" msgstr "Datos de licitación" -#: data_registry/views.py:92 +#: data_registry/views.py:91 msgid "Awards data" msgstr "Datos de adjudicación" -#: data_registry/views.py:93 +#: data_registry/views.py:92 msgid "Contracts data" msgstr "Datos de contratos" -#: data_registry/views.py:94 +#: data_registry/views.py:93 msgid "Documents data" msgstr "Datos de documentos" -#: data_registry/views.py:95 +#: data_registry/views.py:94 msgid "Milestones data" msgstr "Datos de hitos" -#: data_registry/views.py:96 +#: data_registry/views.py:95 msgid "Amendments data" msgstr "Datos de enmiendas" -#: data_registry/views.py:184 +#: data_registry/views.py:183 #, python-format msgid "OCDS data for %(country)s: %(title)s" msgstr "Datos OCDS de %(country)s: %(title)s" diff --git a/data_registry/locale/ru/LC_MESSAGES/django.po b/data_registry/locale/ru/LC_MESSAGES/django.po index 198aab9..e8bbea0 100644 --- a/data_registry/locale/ru/LC_MESSAGES/django.po +++ b/data_registry/locale/ru/LC_MESSAGES/django.po @@ -55,7 +55,7 @@ msgstr "Предыдущий год" msgid "More than a year ago" msgstr "Больше года тому назад" -#: data_registry/admin.py:134 data_registry/admin.py:254 +#: data_registry/admin.py:134 data_registry/admin.py:248 msgid "Management" msgstr "Управление" @@ -73,8 +73,8 @@ msgid "Details" msgstr "Подробности" #: data_registry/admin.py:209 -msgid "failed" -msgstr "Провалена" +msgid "unsuccessful" +msgstr "неудачная" #: data_registry/admin.py:281 msgid "Data availability" diff --git a/data_registry/models.py b/data_registry/models.py index 283d06b..743eeae 100644 --- a/data_registry/models.py +++ b/data_registry/models.py @@ -12,6 +12,14 @@ def format_datetime(dt): class JobQuerySet(models.QuerySet): + def successful(self): + """Return a query set of successfully completed jobs.""" + return self.complete().exclude(models.Exists(Task.objects.failed())) + + def unsuccessful(self): + """Return a query set of unsuccessfully completed jobs.""" + return self.complete().filter(models.Exists(Task.objects.failed())) + def complete(self): """Return a query set of complete jobs.""" return self.filter(status=Job.Status.COMPLETED) @@ -106,7 +114,7 @@ class Status(models.TextChoices): objects = JobQuerySet.as_manager() def __str__(self): - return f"{format_datetime(self.start)} .. {format_datetime(self.end)} ({self.id})" + return f"{format_datetime(self.start)} .. {format_datetime(self.end)} ({self.pk})" def __repr__(self): return f"{self.collection!r}: {self}" @@ -302,7 +310,7 @@ class Meta: verbose_name = "publication" def __str__(self): - return f"{self.title} ({self.id})" + return f"{self.title} ({self.pk})" def __repr__(self): return f"{self.country}: {self}" @@ -355,7 +363,17 @@ class Meta: verbose_name = "data license" def __str__(self): - return f"{self.name} ({self.id})" + return f"{self.name} ({self.pk})" + + +class TaskQuerySet(models.QuerySet): + def failed(self): + # https://docs.djangoproject.com/en/4.2/ref/models/expressions/#some-examples + return Task.objects.filter( + job=models.OuterRef("pk"), + status=Task.Status.COMPLETED, + result=Task.Result.FAILED, + ) class Task(models.Model): @@ -407,11 +425,13 @@ class Type(models.TextChoices): created = models.DateTimeField(auto_now_add=True) modified = models.DateTimeField(auto_now=True) + objects = TaskQuerySet.as_manager() + class Meta: verbose_name = "job task" def __str__(self): - return f"#{self.id}({self.type})" + return f"#{self.pk}({self.type})" def initiate(self): """Mark the task as started.""" diff --git a/data_registry/process_manager/__init__.py b/data_registry/process_manager/__init__.py index 45bf9ce..fae0388 100644 --- a/data_registry/process_manager/__init__.py +++ b/data_registry/process_manager/__init__.py @@ -1,7 +1,9 @@ +import datetime import logging from django.conf import settings from django.db import transaction +from django.utils.timezone import now from data_registry import models from data_registry.exceptions import IrrecoverableError, RecoverableError @@ -47,7 +49,11 @@ def process(collection: models.Collection) -> None: - If it failed temporarily, log the reason - If it failed permanently, fail the task and end the job - - If all tasks succeeded, end the job and update the collection's active job and last retrieved date. + - If all tasks succeeded: + + - End the job + - Update the collection's active job and last retrieved date + - Delete jobs that are more than a year older than the active job, but keep one other complete job In other words, this function advances each job by at most one task. As such, for all tasks of a job to succeed, this function needs to run at least as many times are there are tasks in the ``JOB_TASKS_PLAN`` setting. @@ -121,3 +127,15 @@ def process(collection: models.Collection) -> None: collection.save() logger.debug("Job %s has succeeded (%s: %s)", job, country, collection) + + # Keep the other most recent successful job as backup. + other_jobs = collection.job_set.exclude(pk=job.pk) + backup_job = other_jobs.successful().order_by("start").values_list("pk", flat=True).last() + if backup_job: + other_jobs = other_jobs.exclude(pk=backup_job) + + # There must be at most one incomplete job per collection, for deletion to not conflict with iteration. + for old_job in other_jobs.filter(start__lt=now() - datetime.timedelta(days=365)): + # Note: The Collect task's wipe() method can be slow. + old_job.delete() + logger.debug("Old job %s has been deleted (%s: %s)", old_job, country, collection) diff --git a/data_registry/process_manager/task/exporter.py b/data_registry/process_manager/task/exporter.py index fc81c05..74a1557 100644 --- a/data_registry/process_manager/task/exporter.py +++ b/data_registry/process_manager/task/exporter.py @@ -6,12 +6,12 @@ class Exporter(TaskManager): final_output = True def get_export(self): - return Export(self.job.id, basename="full.jsonl.gz") + return Export(self.job.pk, basename="full.jsonl.gz") def run(self): self.get_export().unlock() - publish({"job_id": self.job.id, "collection_id": self.job.context["process_id_pelican"]}, "exporter_init") + publish({"job_id": self.job.pk, "collection_id": self.job.context["process_id_pelican"]}, "exporter_init") def get_status(self): export = self.get_export() @@ -20,4 +20,4 @@ def get_status(self): @skip_if_not_started def wipe(self): - publish({"job_id": self.job.id}, "wiper_init") + publish({"job_id": self.job.pk}, "wiper_init") diff --git a/data_registry/process_manager/task/flattener.py b/data_registry/process_manager/task/flattener.py index de505a6..5466f18 100644 --- a/data_registry/process_manager/task/flattener.py +++ b/data_registry/process_manager/task/flattener.py @@ -7,15 +7,15 @@ class Flattener(TaskManager): final_output = True def get_exports(self): - for path in Export(self.job.id).get_convertible_paths(): - yield Export(self.job.id, basename=f"{path.name[:-9]}.csv.tar.gz") # remove .jsonl.gz + for path in Export(self.job.pk).get_convertible_paths(): + yield Export(self.job.pk, basename=f"{path.name[:-9]}.csv.tar.gz") # remove .jsonl.gz def run(self): for export in self.get_exports(): if export.running: export.unlock() - publish({"job_id": self.job.id}, "flattener_init") + publish({"job_id": self.job.pk}, "flattener_init") def get_status(self): for export in self.get_exports(): diff --git a/data_registry/process_manager/task/pelican.py b/data_registry/process_manager/task/pelican.py index fc20b15..83540f7 100644 --- a/data_registry/process_manager/task/pelican.py +++ b/data_registry/process_manager/task/pelican.py @@ -21,7 +21,7 @@ def run(self): data_version = self.job.context["data_version"] # set in Collect.get_status() compiled_collection_id = self.job.context["process_id_pelican"] # set in Process.get_status() - name = f"{spider}_{data_version}_{self.job.id}" + name = f"{spider}_{data_version}_{self.job.pk}" self.request( "POST", diff --git a/data_registry/views.py b/data_registry/views.py index 7d3554a..6c7cf77 100644 --- a/data_registry/views.py +++ b/data_registry/views.py @@ -6,7 +6,6 @@ from urllib.parse import urlencode, urljoin import requests -from dateutil.relativedelta import relativedelta from django import urls from django.conf import settings from django.db.models import Count, F, Q @@ -326,10 +325,10 @@ def excel_data(request, job_id, job_range=None): else: if job_range == "6M": end_date = date.today() - start_date = date.today() + relativedelta(months=-6) + start_date = date.today() - timedelta(days=180) if job_range == "1Y": end_date = date.today() - start_date = date.today() + relativedelta(months=-12) + start_date = date.today() - timedelta(days=365) if "|" in job_range: d_from, d_to = job_range.split("|") if d_from and d_to: @@ -354,7 +353,7 @@ def excel_data(request, job_id, job_range=None): else: logger.debug("File %s does not found. Excluding from export.", file_path) - start_date = start_date + relativedelta(months=+1) + start_date = start_date + timedelta(days=30) language = get_language() diff --git a/requirements.in b/requirements.in index 0bce286..6f3f73a 100644 --- a/requirements.in +++ b/requirements.in @@ -5,7 +5,6 @@ flatterer gunicorn[setproctitle] markdown-it-py psycopg2 -python-dateutil requests sentry-sdk yapw[perf] diff --git a/requirements.txt b/requirements.txt index 4bdd4a5..ed2481a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,9 +46,7 @@ pika==1.3.2 psycopg2==2.9.6 # via -r requirements.in python-dateutil==2.8.2 - # via - # -r requirements.in - # pandas + # via pandas pytz==2021.1 # via pandas requests==2.32.3 diff --git a/tests/data_registry/process_manager/test_process.py b/tests/data_registry/process_manager/test_process.py index 9f165f6..5d0c159 100644 --- a/tests/data_registry/process_manager/test_process.py +++ b/tests/data_registry/process_manager/test_process.py @@ -1,7 +1,9 @@ +from datetime import timedelta from unittest.mock import patch from django.conf import settings from django.test import TransactionTestCase +from django.utils.timezone import now from data_registry.models import Collection, Job, Task from data_registry.process_manager import process @@ -11,43 +13,85 @@ class ProcessTests(TransactionTestCase): fixtures = ["tests/fixtures/fixtures.json"] - def test(self): + def test_task_progress(self): collection = Collection.objects.get(pk=1) + job_set = collection.job_set - with patch("data_registry.process_manager.get_task_manager") as mock_get_task_manager: - mock_get_task_manager.return_value = TestTask() - + with patch("data_registry.process_manager.get_task_manager") as mock_process: + mock_process.return_value = TestTask() settings.JOB_TASKS_PLAN = ["test"] - # first call initializes job and runs first task + # First call initializes the job and runs the first task. process(collection) - - job = collection.job_set.first() - - # skip wipe - job.keep_all_data = True - job.save() - - self.assertIsNotNone(job) - self.assertIsNotNone(job.start) - + job = job_set.first() task = job.task_set.order_by("order").first() - self.assertEqual(Task.Status.RUNNING, task.status) + self.assertEqual(job.status, Job.Status.RUNNING) + self.assertIsNotNone(job.start) + self.assertIsNone(job.end) + self.assertEqual(task.status, Task.Status.RUNNING) self.assertIsNotNone(task.start) + self.assertIsNone(task.end) self.assertEqual("", task.result) - # next call updates running task state + # Last call updates the status of the running task and job. process(collection) - + job = job_set.first() task = job.task_set.order_by("order").first() - self.assertEqual(Task.Status.COMPLETED, task.status) + self.assertEqual(job.status, Job.Status.COMPLETED) + self.assertIsNotNone(job.start) + self.assertIsNotNone(job.end) + self.assertEqual(task.status, Task.Status.COMPLETED) + self.assertIsNotNone(task.start) self.assertIsNotNone(task.end) self.assertEqual("OK", task.result) - # the job plan contains only one task, therefore the job should be completed - # after completion of that task - job = collection.job_set.first() - self.assertIsNotNone(job.end) - self.assertEqual(Job.Status.COMPLETED, job.status) + def test_delete_jobs(self): + collection = Collection.objects.get(pk=1) + job_set = collection.job_set + + with ( + patch("data_registry.process_manager.get_task_manager") as mock_process, + patch("data_registry.signals.get_task_manager") as mock_delete, + ): + mock_process.return_value = mock_delete.return_value = TestTask() + settings.JOB_TASKS_PLAN = ["test"] + + for delete, keep in (([], []), ([], [500]), ([], [100]), ([500, 400], [200, 100])): + with self.subTest(delete=delete, keep=keep): + expected = [] + + # Create a new job that will complete after processing. + job = job_set.create(status=Job.Status.RUNNING, start=now()) + job.task_set.update(status=Task.Status.RUNNING) + expected.append(job) + + # Create some jobs that completed successfully and unsuccessfully. + for days in keep: + successful = job_set.create(status=Job.Status.COMPLETED, start=now() - timedelta(days=days)) + successful.task_set.update(status=Task.Status.COMPLETED, result=Task.Result.OK) + # An old, successful job is kept, if it is the other most recent successful job. + expected.append(successful) + + unsuccessful = job_set.create(status=Job.Status.COMPLETED, start=now() - timedelta(days=days)) + unsuccessful.task_set.update(status=Task.Status.COMPLETED, result=Task.Result.FAILED) + if days <= 365: # An old, unsuccessful job is deleted, unconditionally. + expected.append(unsuccessful) + + # Create old jobs that completed successfully and unsuccessfully. + for days in delete: + successful = job_set.create(status=Job.Status.COMPLETED, start=now() - timedelta(days=days)) + successful.task_set.update(status=Task.Status.COMPLETED, result=Task.Result.OK) + + unsuccessful = job_set.create(status=Job.Status.COMPLETED, start=now() - timedelta(days=days)) + unsuccessful.task_set.update(status=Task.Status.COMPLETED, result=Task.Result.FAILED) + + process(collection) + + self.assertEqual(list(job_set.all()), expected) + + # Delete jobs for the next subtest. + collection.active_job = None + collection.save() + job_set.all().delete() diff --git a/tests/data_registry/test_models.py b/tests/data_registry/test_models.py index 59fb25d..105c16b 100644 --- a/tests/data_registry/test_models.py +++ b/tests/data_registry/test_models.py @@ -30,7 +30,7 @@ def test_protect(self): self.active_job.delete() # Mock get_task_manager() to avoid NotImplementedError. - with patch("data_registry.signals.get_task_manager") as mock_get_task_manager: - mock_get_task_manager.return_value = TestTask() + with patch("data_registry.signals.get_task_manager") as mock_signals: + mock_signals.return_value = TestTask() self.inactive_job.delete() diff --git a/tests/data_registry/test_views.py b/tests/data_registry/test_views.py index de510b7..0fc03a2 100644 --- a/tests/data_registry/test_views.py +++ b/tests/data_registry/test_views.py @@ -38,7 +38,7 @@ def setUpTestData(cls): public=True, ) cls.job = cls.collection1.active_job = cls.collection1.job_set.create( - id=99, # to match tests/fixtures directory + pk=99, # to match tests/fixtures directory date_from=datetime.date(2010, 2, 1), date_to=datetime.date(2023, 9, 30), ) @@ -83,16 +83,16 @@ def test_search(self): response = Client().get("/en/search/") self.assertTemplateUsed("search.html") - self.assertContains(response, f'"/en/publication/{self.collection1.id}"') - self.assertContains(response, f'"/en/publication/{self.collection2.id}"') + self.assertContains(response, f'"/en/publication/{self.collection1.pk}"') + self.assertContains(response, f'"/en/publication/{self.collection2.pk}"') @patch("exporter.util.Export.get_files") def test_detail(self, get_files): get_files.return_value = {"jsonl": {"full": 123, "by_year": [{"year": 2022, "size": 1}]}} - url = f"/en/publication/{self.collection1.id}/download?name=2022.jsonl.gz" + url = f"/en/publication/{self.collection1.pk}/download?name=2022.jsonl.gz" with self.assertNumQueries(2): - response = Client().get(f"/en/publication/{self.collection1.id}") + response = Client().get(f"/en/publication/{self.collection1.pk}") self.assertTemplateUsed("detail.html") self.assertContains( @@ -114,21 +114,21 @@ def test_collection_not_found(self): def test_download_export_invalid_suffix(self): with self.assertNumQueries(0): - response = Client().get(f"/en/publication/{self.collection1.id}/download?name=invalid") + response = Client().get(f"/en/publication/{self.collection1.pk}/download?name=invalid") self.assertEqual(response.status_code, 400) self.assertEqual(response.content, b"The name query string parameter is invalid") def test_download_export_empty_parameter(self): with self.assertNumQueries(0): - response = Client().get(f"/en/publication/{self.collection1.id}/download?name=") + response = Client().get(f"/en/publication/{self.collection1.pk}/download?name=") self.assertEqual(response.status_code, 400) self.assertEqual(response.content, b"The name query string parameter is invalid") def test_download_export_waiting(self): with self.assertNumQueries(1): - response = Client().get(f"/en/publication/{self.collection_no_files.id}/download?name=2000.jsonl.gz") + response = Client().get(f"/en/publication/{self.collection_no_files.pk}/download?name=2000.jsonl.gz") self.assertEqual(response.status_code, 404) self.assertEqual(response.content, b"File not found") @@ -136,7 +136,7 @@ def test_download_export_waiting(self): @patch("exporter.util.Export.lockfile", new_callable=PropertyMock) def test_download_export_running(self, exists): with self.assertNumQueries(1): - response = Client().get(f"/en/publication/{self.collection1.id}/download?name=2000.jsonl.gz") + response = Client().get(f"/en/publication/{self.collection1.pk}/download?name=2000.jsonl.gz") self.assertEqual(response.status_code, 404) self.assertEqual(response.content, b"File not found") @@ -149,7 +149,7 @@ def test_download_export_completed(self): ): with self.subTest(suffix=suffix), self.assertNumQueries(1): response = Client().get( - f"/en/publication/{self.collection1.id}/download?name=2000.{suffix}", + f"/en/publication/{self.collection1.pk}/download?name=2000.{suffix}", HTTP_ACCEPT_ENCODING="gzip", ) @@ -179,7 +179,7 @@ def test_publications_api(self): expected = [ { - "id": self.collection1.id, + "id": self.collection1.pk, "title": "National Directorate of Public Procurement (DNCP)", "country": "Paraguay (EN)", "region": "LAC", @@ -194,7 +194,7 @@ def test_publications_api(self): "date_to": "2023-09-30", }, { - "id": self.collection2.id, + "id": self.collection2.pk, "title": "Title", "country": "Canada (EN)", "region": "LAC", @@ -209,7 +209,7 @@ def test_publications_api(self): "date_to": None, }, { - "id": self.collection_no_files.id, + "id": self.collection_no_files.pk, "title": "Test", "country": "", "region": "",