Merge pull request #525 from maykinmedia/feature/502-make-report-xlsx

[#502] Make destruction report xlsx file instead of csv
maykinmedia · Dec 3, 2024 · 07cfcab · 07cfcab
2 parents e2471d2 + 4c2b867
commit 07cfcab
Show file tree

Hide file tree

Showing 13 changed files with 211 additions and 64 deletions.
diff --git a/backend/requirements/base.in b/backend/requirements/base.in
@@ -37,4 +37,5 @@ celery
 # Additional libraries
 zgw-consumers
 furl
-python-slugify
+python-slugify
+XlsxWriter
diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt
@@ -261,5 +261,7 @@ webauthn==2.1.0
     # via django-two-factor-auth
 wrapt==1.14.1
     # via elastic-apm
+xlsxwriter==3.2.0
+    # via -r requirements/base.in
 zgw-consumers==0.36.1
     # via -r requirements/base.in
diff --git a/backend/requirements/ci.txt b/backend/requirements/ci.txt
@@ -280,6 +280,8 @@ elastic-apm==6.22.0
     # via
     #   -c requirements/base.txt
     #   -r requirements/base.txt
+et-xmlfile==2.0.0
+    # via openpyxl
 face==20.1.1
     # via
     #   -c requirements/base.txt
@@ -369,6 +371,8 @@ multidict==6.0.5
     # via yarl
 mypy-extensions==1.0.0
     # via black
+openpyxl==3.1.5
+    # via -r requirements/test-tools.in
 orderedmultidict==1.0.1
     # via
     #   -c requirements/base.txt
@@ -630,6 +634,10 @@ wrapt==1.14.1
     #   -r requirements/base.txt
     #   elastic-apm
     #   vcrpy
+xlsxwriter==3.2.0
+    # via
+    #   -c requirements/base.txt
+    #   -r requirements/base.txt
 yarl==1.9.4
     # via vcrpy
 zgw-consumers==0.36.1

diff --git a/backend/requirements/dev.txt b/backend/requirements/dev.txt
@@ -324,6 +324,11 @@ elastic-apm==6.22.0
     # via
     #   -c requirements/ci.txt
     #   -r requirements/ci.txt
+et-xmlfile==2.0.0
+    # via
+    #   -c requirements/ci.txt
+    #   -r requirements/ci.txt
+    #   openpyxl
 face==20.1.1
     # via
     #   -c requirements/ci.txt
@@ -456,6 +461,10 @@ mypy-extensions==1.0.0
     #   -c requirements/ci.txt
     #   -r requirements/ci.txt
     #   black
+openpyxl==3.1.5
+    # via
+    #   -c requirements/ci.txt
+    #   -r requirements/ci.txt
 orderedmultidict==1.0.1
     # via
     #   -c requirements/ci.txt
@@ -820,6 +829,10 @@ wrapt==1.14.1
     #   -r requirements/ci.txt
     #   elastic-apm
     #   vcrpy
+xlsxwriter==3.2.0
+    # via
+    #   -c requirements/ci.txt
+    #   -r requirements/ci.txt
 yarl==1.9.4
     # via
     #   -c requirements/ci.txt

diff --git a/backend/requirements/test-tools.in b/backend/requirements/test-tools.in
@@ -17,6 +17,9 @@ testfixtures
 vcrpy
 pytest-playwright 
 docker
+# XlsxWriter is more suitable for writing large files, but doesn't support reading them. 
+# So for the tests we use openpyxl to check the created excel files.
+openpyxl 
 
 # Documentation
 sphinx

diff --git a/backend/src/openarchiefbeheer/destruction/destruction_report.py b/backend/src/openarchiefbeheer/destruction/destruction_report.py
@@ -0,0 +1,43 @@
+from dataclasses import dataclass
+from typing import IO
+
+from django.utils.translation import gettext
+
+import xlsxwriter
+from glom import glom
+from xlsxwriter.worksheet import Worksheet
+
+from openarchiefbeheer.zaken.api.constants import ZAAK_METADATA_FIELDS_MAPPINGS
+
+from .constants import InternalStatus
+from .models import DestructionList
+
+
+@dataclass
+class DestructionReportGenerator:
+    destruction_list: DestructionList
+
+    def add_zaken_table(self, worksheet: Worksheet, start_row: int = 0) -> None:
+        worksheet.write_row(
+            start_row, 0, [field["name"] for field in ZAAK_METADATA_FIELDS_MAPPINGS]
+        )
+
+        for row_count, item in enumerate(
+            self.destruction_list.items.filter(
+                processing_status=InternalStatus.succeeded
+            ).iterator(chunk_size=1000)
+        ):
+            data = [
+                glom(item.extra_zaak_data, field["path"], default="")
+                for field in ZAAK_METADATA_FIELDS_MAPPINGS
+            ]
+            worksheet.write_row(start_row + row_count + 1, 0, data)
+
+    def generate_destruction_report(self, file: IO) -> None:
+        workbook = xlsxwriter.Workbook(file.name, options={"in_memory": False})
+
+        worksheet = workbook.add_worksheet(name=gettext("Deleted zaken"))
+
+        self.add_zaken_table(worksheet)
+
+        workbook.close()
diff --git a/backend/src/openarchiefbeheer/destruction/models.py b/backend/src/openarchiefbeheer/destruction/models.py
@@ -1,4 +1,3 @@
-import csv
 import logging
 import traceback
 import uuid as _uuid
@@ -220,48 +219,21 @@ def abort_destruction(self) -> None:
         self.save()
 
     def generate_destruction_report(self) -> None:
+        from .destruction_report import DestructionReportGenerator
+
         if not self.status == ListStatus.deleted:
             logger.warning("The destruction list has not been deleted yet.")
             return
 
-        fieldnames = [
-            "url",
-            "einddatum",
-            "resultaat",
-            "startdatum",
-            "omschrijving",
-            "identificatie",
-            "zaaktype url",
-            "zaaktype omschrijving",
-            "selectielijst procestype nummer",
-        ]
-        with NamedTemporaryFile(mode="w", newline="", delete_on_close=False) as f_tmp:
-            writer = csv.DictWriter(f_tmp, fieldnames=fieldnames)
-            writer.writeheader()
-            for item in self.items.filter(
-                processing_status=InternalStatus.succeeded
-            ).iterator(chunk_size=1000):
-                data = {
-                    **item.extra_zaak_data,
-                    **{
-                        "zaaktype url": item.extra_zaak_data["zaaktype"]["url"],
-                        "zaaktype omschrijving": item.extra_zaak_data["zaaktype"][
-                            "omschrijving"
-                        ],
-                        "selectielijst procestype nummer": item.extra_zaak_data[
-                            "zaaktype"
-                        ]["selectielijst_procestype"]["nummer"],
-                    },
-                }
-                del data["zaaktype"]
-
-                writer.writerow(data)
-
+        generator = DestructionReportGenerator(destruction_list=self)
+        with NamedTemporaryFile(mode="wb", delete_on_close=False) as f_tmp:
+            generator.generate_destruction_report(f_tmp)
             f_tmp.close()
-            with open(f_tmp.name, mode="r") as f:
+
+            with open(f_tmp.name, mode="rb") as f:
                 django_file = File(f)
                 self.destruction_report.save(
-                    f"report_{slugify(self.name)}.csv", django_file
+                    f"report_{slugify(self.name)}.xlsx", django_file
                 )
 
         self.save()

diff --git a/backend/src/openarchiefbeheer/destruction/tests/test_models.py b/backend/src/openarchiefbeheer/destruction/tests/test_models.py
@@ -4,8 +4,10 @@
 from django.core.exceptions import ObjectDoesNotExist
 from django.test import TestCase
 from django.utils import timezone
+from django.utils.translation import gettext
 
 from freezegun import freeze_time
+from openpyxl import load_workbook
 from privates.test import temp_private_root
 from requests import HTTPError
 from requests_mock import Mocker
@@ -344,25 +346,66 @@ def test_generate_destruction_report(self):
 
         destruction_list.refresh_from_db()
 
-        destruction_list.destruction_report
-        lines = [line for line in destruction_list.destruction_report.readlines()]
+        wb = load_workbook(filename=destruction_list.destruction_report.path)
+        sheet_deleted_zaken = wb[gettext("Deleted zaken")]
+        rows = list(sheet_deleted_zaken.iter_rows(values_only=True))
 
-        self.assertEqual(len(lines), 4)
+        self.assertEqual(len(rows), 4)
         self.assertEqual(
-            lines[0],
-            b"url,einddatum,resultaat,startdatum,omschrijving,identificatie,zaaktype url,zaaktype omschrijving,selectielijst procestype nummer\n",
+            rows[0],
+            (
+                "url",
+                "einddatum",
+                "resultaat",
+                "startdatum",
+                "omschrijving",
+                "identificatie",
+                "zaaktype url",
+                "zaaktype omschrijving",
+                "selectielijst procestype nummer",
+            ),
         )
         self.assertEqual(
-            lines[1],
-            b"http://zaken.nl/api/v1/zaken/111-111-111,2022-01-01,http://zaken.nl/api/v1/resultaten/111-111-111,2020-01-01,Test description 1,ZAAK-01,http://catalogi.nl/api/v1/zaaktypen/111-111-111,Tralala zaaktype,1\n",
+            rows[1],
+            (
+                "http://zaken.nl/api/v1/zaken/111-111-111",
+                "2022-01-01",
+                "http://zaken.nl/api/v1/resultaten/111-111-111",
+                "2020-01-01",
+                "Test description 1",
+                "ZAAK-01",
+                "http://catalogi.nl/api/v1/zaaktypen/111-111-111",
+                "Tralala zaaktype",
+                1,
+            ),
         )
         self.assertEqual(
-            lines[2],
-            b"http://zaken.nl/api/v1/zaken/111-111-222,2022-01-02,http://zaken.nl/api/v1/resultaten/111-111-222,2020-01-02,Test description 2,ZAAK-02,http://catalogi.nl/api/v1/zaaktypen/111-111-111,Tralala zaaktype,1\n",
+            rows[2],
+            (
+                "http://zaken.nl/api/v1/zaken/111-111-222",
+                "2022-01-02",
+                "http://zaken.nl/api/v1/resultaten/111-111-222",
+                "2020-01-02",
+                "Test description 2",
+                "ZAAK-02",
+                "http://catalogi.nl/api/v1/zaaktypen/111-111-111",
+                "Tralala zaaktype",
+                1,
+            ),
         )
         self.assertEqual(
-            lines[3],
-            b"http://zaken.nl/api/v1/zaken/111-111-333,2022-01-03,http://zaken.nl/api/v1/resultaten/111-111-333,2020-01-03,Test description 3,ZAAK-03,http://catalogi.nl/api/v1/zaaktypen/111-111-222,Tralala zaaktype,2\n",
+            rows[3],
+            (
+                "http://zaken.nl/api/v1/zaken/111-111-333",
+                "2022-01-03",
+                "http://zaken.nl/api/v1/resultaten/111-111-333",
+                "2020-01-03",
+                "Test description 3",
+                "ZAAK-03",
+                "http://catalogi.nl/api/v1/zaaktypen/111-111-222",
+                "Tralala zaaktype",
+                2,
+            ),
         )
 
     def test_zaak_creation_skipped_if_internal_status_succeeded(self):

diff --git a/backend/src/openarchiefbeheer/destruction/tests/test_tasks.py b/backend/src/openarchiefbeheer/destruction/tests/test_tasks.py
@@ -7,6 +7,7 @@
 from django.utils.translation import gettext as _, ngettext
 
 from freezegun import freeze_time
+from openpyxl import load_workbook
 from privates.test import temp_private_root
 from requests import HTTPError
 from requests_mock import Mocker
@@ -439,16 +440,38 @@ def test_process_list(self):
             ).exists()
         )
 
-        lines = [line for line in destruction_list.destruction_report.readlines()]
+        wb = load_workbook(filename=destruction_list.destruction_report.path)
+        sheet_deleted_zaken = wb[_("Deleted zaken")]
+        rows = list(sheet_deleted_zaken.iter_rows(values_only=True))
 
-        self.assertEqual(len(lines), 3)
+        self.assertEqual(len(rows), 3)
         self.assertEqual(
-            lines[1],
-            b"http://zaken.nl/api/v1/zaken/111-111-111,2022-01-01,http://zaken.nl/api/v1/resultaten/111-111-111,2020-01-01,Test description 1,ZAAK-01,http://catalogue-api.nl/zaaktypen/111-111-111,Aangifte behandelen,1\n",
+            rows[1],
+            (
+                "http://zaken.nl/api/v1/zaken/111-111-111",
+                "2022-01-01",
+                "http://zaken.nl/api/v1/resultaten/111-111-111",
+                "2020-01-01",
+                "Test description 1",
+                "ZAAK-01",
+                "http://catalogue-api.nl/zaaktypen/111-111-111",
+                "Aangifte behandelen",
+                1,
+            ),
         )
         self.assertEqual(
-            lines[2],
-            b"http://zaken.nl/api/v1/zaken/222-222-222,2022-01-02,http://zaken.nl/api/v1/resultaten/111-111-222,2020-01-02,Test description 2,ZAAK-02,http://catalogue-api.nl/zaaktypen/111-111-111,Aangifte behandelen,1\n",
+            rows[2],
+            (
+                "http://zaken.nl/api/v1/zaken/222-222-222",
+                "2022-01-02",
+                "http://zaken.nl/api/v1/resultaten/111-111-222",
+                "2020-01-02",
+                "Test description 2",
+                "ZAAK-02",
+                "http://catalogue-api.nl/zaaktypen/111-111-111",
+                "Aangifte behandelen",
+                1,
+            ),
         )
 
         m_zaak.assert_called()
@@ -583,19 +606,41 @@ def test_complete_and_notify(self):
         self.assertEqual(destruction_list.processing_status, InternalStatus.succeeded)
         self.assertEqual(
             destruction_list.destruction_report.name,
-            "destruction_reports/2024/10/09/report_some-destruction-list.csv",
+            "destruction_reports/2024/10/09/report_some-destruction-list.xlsx",
         )
 
-        lines = [line for line in destruction_list.destruction_report.readlines()]
+        wb = load_workbook(filename=destruction_list.destruction_report.path)
+        sheet_deleted_zaken = wb[_("Deleted zaken")]
+        rows = list(sheet_deleted_zaken.iter_rows(values_only=True))
 
-        self.assertEqual(len(lines), 2)
+        self.assertEqual(len(rows), 2)
         self.assertEqual(
-            lines[0],
-            b"url,einddatum,resultaat,startdatum,omschrijving,identificatie,zaaktype url,zaaktype omschrijving,selectielijst procestype nummer\n",
+            rows[0],
+            (
+                "url",
+                "einddatum",
+                "resultaat",
+                "startdatum",
+                "omschrijving",
+                "identificatie",
+                "zaaktype url",
+                "zaaktype omschrijving",
+                "selectielijst procestype nummer",
+            ),
         )
         self.assertEqual(
-            lines[1],
-            b"http://zaken.nl/api/v1/zaken/111-111-111,2022-01-01,http://zaken.nl/api/v1/resultaten/111-111-111,2020-01-01,Test description 1,ZAAK-01,http://catalogi.nl/api/v1/zaaktypen/111-111-111,Tralala zaaktype,1\n",
+            rows[1],
+            (
+                "http://zaken.nl/api/v1/zaken/111-111-111",
+                "2022-01-01",
+                "http://zaken.nl/api/v1/resultaten/111-111-111",
+                "2020-01-01",
+                "Test description 1",
+                "ZAAK-01",
+                "http://catalogi.nl/api/v1/zaaktypen/111-111-111",
+                "Tralala zaaktype",
+                1,
+            ),
         )
 
     @override_settings(CELERY_TASK_ALWAYS_EAGER=True)