[PP-1456] Add loan count column to audio book playback time reports. (#…

…2002)
ThePalaceProject · Sep 4, 2024 · 3f5d5ae · 3f5d5ae
1 parent de62c4e
commit 3f5d5ae
Show file tree

Hide file tree

Showing 8 changed files with 407 additions and 38 deletions.
diff --git a/alembic/versions/20240821_7a2fcaac8b63_add_loan_identifier_column_to_playtime_tables.py b/alembic/versions/20240821_7a2fcaac8b63_add_loan_identifier_column_to_playtime_tables.py
@@ -0,0 +1,59 @@
+"""Add loan_identifier column to playtime tables.
+
+Revision ID: 7a2fcaac8b63
+Revises: 7ba553f3f80d
+Create Date: 2024-08-21 23:23:48.085451+00:00
+
+"""
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.orm.session import Session
+
+# revision identifiers, used by Alembic.
+revision = "7a2fcaac8b63"
+down_revision = "7ba553f3f80d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    session = Session(bind=op.get_bind())
+    conn = session.connection()
+
+    op.add_column(
+        "playtime_entries",
+        sa.Column("loan_identifier", sa.String(length=40), nullable=False, default=""),
+    )
+
+    op.add_column(
+        "playtime_summaries",
+        sa.Column("loan_identifier", sa.String(length=40), nullable=False, default=""),
+    )
+
+    op.drop_constraint("unique_playtime_summary", "playtime_summaries", type_="unique")
+
+    op.create_unique_constraint(
+        "unique_playtime_summary",
+        "playtime_summaries",
+        [
+            "timestamp",
+            "identifier_str",
+            "collection_name",
+            "library_name",
+            "loan_identifier",
+        ],
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("playtime_entries", "loan_identifier")
+
+    op.drop_constraint("unique_playtime_summary", "playtime_summaries", type_="unique")
+
+    op.drop_column("playtime_summaries", "loan_identifier")
+
+    op.create_unique_constraint(
+        "unique_playtime_summary",
+        "playtime_summaries",
+        ["timestamp", "identifier_str", "collection_name", "library_name"],
+    )
diff --git a/src/palace/manager/api/controller/playtime_entries.py b/src/palace/manager/api/controller/playtime_entries.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
+import hashlib
+
 import flask
 from pydantic import ValidationError
+from sqlalchemy import select
 
 from palace.manager.api.controller.circulation_manager import (
     CirculationManagerController,
@@ -16,8 +19,23 @@
 from palace.manager.sqlalchemy.model.collection import Collection
 from palace.manager.sqlalchemy.model.identifier import Identifier
 from palace.manager.sqlalchemy.model.library import Library
+from palace.manager.sqlalchemy.model.licensing import LicensePool
+from palace.manager.sqlalchemy.model.patron import Loan
 from palace.manager.sqlalchemy.util import get_one
 
+MISSING_LOAN_IDENTIFIER = "LOAN_NOT_FOUND"
+
+
+def resolve_loan_identifier(loan: Loan | None) -> str:
+    def sha1(msg):
+        return
+
+    return (
+        hashlib.sha1(f"loan: {loan.id}".encode()).hexdigest()
+        if loan
+        else MISSING_LOAN_IDENTIFIER
+    )
+
 
 class PlaytimeEntriesController(CirculationManagerController):
     def track_playtimes(self, collection_id, identifier_type, identifier_idn):
@@ -49,8 +67,32 @@ def track_playtimes(self, collection_id, identifier_type, identifier_idn):
         except ValidationError as ex:
             return INVALID_INPUT.detailed(ex.json())
 
+        # attempt to resolve a loan associated with the patron, identifier, in the time period
+        entry_max_start_time = max([x.during_minute for x in data.time_entries])
+        entry_min_end_time = min([x.during_minute for x in data.time_entries])
+
+        loan = self._db.scalars(
+            select(Loan)
+            .select_from(Loan)
+            .join(LicensePool)
+            .where(
+                LicensePool.identifier == identifier,
+                Loan.patron == flask.request.patron,
+                Loan.start <= entry_max_start_time,
+                Loan.end > entry_min_end_time,
+            )
+            .order_by(Loan.start.desc())
+        ).first()
+
+        loan_identifier = resolve_loan_identifier(loan=loan)
+
         responses, summary = PlaytimeEntries.insert_playtime_entries(
-            self._db, identifier, collection, library, data
+            self._db,
+            identifier,
+            collection,
+            library,
+            data,
+            loan_identifier,
         )
 
         response_data = PlaytimeEntriesPostResponse(

diff --git a/src/palace/manager/core/query/playtime_entries.py b/src/palace/manager/core/query/playtime_entries.py
@@ -28,6 +28,7 @@ def insert_playtime_entries(
         collection: Collection,
         library: Library,
         data: PlaytimeEntriesPost,
+        loan_identifier: str,
     ) -> tuple[list, PlaytimeEntriesPostSummary]:
         """Insert into the database playtime entries from a request"""
         responses = []
@@ -59,6 +60,7 @@ def insert_playtime_entries(
                         library_name=library.name,
                         timestamp=entry.during_minute,
                         total_seconds_played=entry.seconds_played,
+                        loan_identifier=loan_identifier,
                     )
             except IntegrityError as ex:
                 logging.getLogger("Time Tracking").error(

diff --git a/src/palace/manager/scripts/playtime_entries.py b/src/palace/manager/scripts/playtime_entries.py
@@ -11,7 +11,9 @@
 
 import dateutil.parser
 import pytz
-from sqlalchemy.sql.expression import false, true
+from sqlalchemy.sql.expression import and_, distinct, false, select, true
+from sqlalchemy.sql.functions import coalesce, count
+from sqlalchemy.sql.functions import max as sql_max
 from sqlalchemy.sql.functions import sum
 
 from palace.manager.core.config import Configuration
@@ -57,7 +59,7 @@ def do_run(self):
             PlaytimeEntry.timestamp <= cut_off,
         )
 
-        # Aggregate entries per identifier-timestamp-collection-library grouping.
+        # Aggregate entries per identifier-timestamp-collection-library-loan_identifier grouping.
         # The label forms of the identifier, collection, and library are also
         # factored in, in case any of the foreign keys are missing.
         # Since timestamps should be on minute-boundaries the aggregation
@@ -71,6 +73,7 @@ def group_key_for_entry(e: PlaytimeEntry) -> tuple:
                 e.identifier_str,
                 e.collection_name,
                 e.library_name,
+                e.loan_identifier,
             )
 
         by_group = defaultdict(int)
@@ -88,6 +91,7 @@ def group_key_for_entry(e: PlaytimeEntry) -> tuple:
                 identifier_str,
                 collection_name,
                 library_name,
+                loan_identifier,
             ) = group
 
             # Update the playtime summary.
@@ -101,9 +105,11 @@ def group_key_for_entry(e: PlaytimeEntry) -> tuple:
                 identifier_str=identifier_str,
                 collection_name=collection_name,
                 library_name=library_name,
+                loan_identifier=loan_identifier,
             )
             self.log.info(
-                f"Added {seconds} to {identifier_str} ({collection_name} in {library_name}) for {timestamp}: new total {playtime.total_seconds_played}."
+                f"Added {seconds} to {identifier_str} ({collection_name} in {library_name} with loan id of "
+                f"{loan_identifier}) for {timestamp}: new total {playtime.total_seconds_played}."
             )
 
         self._db.commit()
@@ -214,33 +220,80 @@ def do_run(self):
                 self.log.warning(temp.read())
 
     def _fetch_report_records(self, start: datetime, until: datetime) -> Query:
-        return (
-            self._db.query(PlaytimeSummary)
-            .with_entities(
+        # The loan count query returns only non-empty string isbns and titles if there is more
+        # than one row returned with the grouping.  This way we ensure that we do not
+        # count the same loan twice in the case we have when a
+        # 1. a single loan with identifier A
+        # 2. and one or more playtime summaries with title A or no title or isbn A or no isbn
+        # 3. and one more playtime summaries with title B, isbn B
+        # This situation can occur when the title and isbn  metadata associated with an ID changes due to a feed
+        # update that occurs between playlist entry posts.
+        # in this case we just associate the loan identifier with one unique combination of the list of titles and isbn
+        # values.
+        loan_count_query = (
+            select(
+                PlaytimeSummary.identifier_str.label("identifier_str2"),
+                PlaytimeSummary.collection_name.label("collection_name2"),
+                PlaytimeSummary.library_name.label("library_name2"),
+                sql_max(coalesce(PlaytimeSummary.isbn, "")).label("isbn2"),
+                sql_max(coalesce(PlaytimeSummary.title, "")).label("title2"),
+                count(distinct(PlaytimeSummary.loan_identifier)).label("loan_count"),
+            )
+            .where(PlaytimeSummary.timestamp.between(start, until))
+            .group_by(
                 PlaytimeSummary.identifier_str,
                 PlaytimeSummary.collection_name,
                 PlaytimeSummary.library_name,
-                PlaytimeSummary.isbn,
-                PlaytimeSummary.title,
-                sum(PlaytimeSummary.total_seconds_played),
+                PlaytimeSummary.identifier_id,
             )
-            .filter(
-                PlaytimeSummary.timestamp >= start,
-                PlaytimeSummary.timestamp < until,
+            .subquery()
+        )
+
+        seconds_query = (
+            select(
+                PlaytimeSummary.identifier_str,
+                PlaytimeSummary.collection_name,
+                PlaytimeSummary.library_name,
+                coalesce(PlaytimeSummary.isbn, "").label("isbn"),
+                coalesce(PlaytimeSummary.title, "").label("title"),
+                sum(PlaytimeSummary.total_seconds_played).label("total_seconds_played"),
             )
+            .where(PlaytimeSummary.timestamp.between(start, until))
             .group_by(
                 PlaytimeSummary.identifier_str,
                 PlaytimeSummary.collection_name,
                 PlaytimeSummary.library_name,
-                PlaytimeSummary.identifier_id,
                 PlaytimeSummary.isbn,
                 PlaytimeSummary.title,
+                PlaytimeSummary.identifier_id,
             )
-            .order_by(
-                PlaytimeSummary.collection_name,
-                PlaytimeSummary.library_name,
-                PlaytimeSummary.identifier_str,
-            )
+            .subquery()
+        )
+
+        combined = self._db.query(seconds_query, loan_count_query).outerjoin(
+            loan_count_query,
+            and_(
+                seconds_query.c.identifier_str == loan_count_query.c.identifier_str2,
+                seconds_query.c.collection_name == loan_count_query.c.collection_name2,
+                seconds_query.c.library_name == loan_count_query.c.library_name2,
+                seconds_query.c.isbn == loan_count_query.c.isbn2,
+                seconds_query.c.title == loan_count_query.c.title2,
+            ),
+        )
+        combined_sq = combined.subquery()
+
+        return self._db.query(
+            combined_sq.c.identifier_str,
+            combined_sq.c.collection_name,
+            combined_sq.c.library_name,
+            combined_sq.c.isbn,
+            combined_sq.c.title,
+            combined_sq.c.total_seconds_played,
+            coalesce(combined_sq.c.loan_count, 0),
+        ).order_by(
+            combined_sq.c.collection_name,
+            combined_sq.c.library_name,
+            combined_sq.c.identifier_str,
         )
 
 
@@ -256,6 +309,7 @@ def _produce_report(writer: Writer, date_label, records=None) -> None:
             "library",
             "title",
             "total seconds",
+            "loan count",
         )
     )
     for (
@@ -265,15 +319,17 @@ def _produce_report(writer: Writer, date_label, records=None) -> None:
         isbn,
         title,
         total,
+        loan_count,
     ) in records:
         row = (
             date_label,
             identifier_str,
-            isbn,
+            None if isbn == "" else isbn,
             collection_name,
             library_name,
-            title,
+            None if title == "" else title,
             total,
+            loan_count,
         )
         # Write the row to the CSV
         writer.writerow(row)
diff --git a/src/palace/manager/sqlalchemy/model/time_tracking.py b/src/palace/manager/sqlalchemy/model/time_tracking.py
@@ -74,6 +74,8 @@ class PlaytimeEntry(Base):
     collection: Mapped[Collection] = relationship("Collection", uselist=False)
     library: Mapped[Library] = relationship("Library", uselist=False)
 
+    loan_identifier = Column(String(40), nullable=False)
+
     __table_args__ = (
         UniqueConstraint(
             "tracking_id",
@@ -128,6 +130,7 @@ class PlaytimeSummary(Base):
 
     title = Column(String)
     isbn = Column(String)
+    loan_identifier = Column(String(40), nullable=False)
 
     identifier: Mapped[Identifier] = relationship("Identifier", uselist=False)
     collection: Mapped[Collection] = relationship("Collection", uselist=False)
@@ -139,6 +142,7 @@ class PlaytimeSummary(Base):
             "identifier_str",
             "collection_name",
             "library_name",
+            "loan_identifier",
             name="unique_playtime_summary",
         ),
     )
@@ -155,6 +159,7 @@ def add(
         identifier_str: str,
         collection_name: str,
         library_name: str | None,
+        loan_identifier: str,
     ) -> PlaytimeSummary:
         """Add playtime (in seconds) to it's associated minute-level summary record."""
         # Update each label with its current value, if its foreign key is present.
@@ -178,6 +183,7 @@ def add(
             "collection_name": None if collection else collection_name,
             "library_id": library.id if library else None,
             "library_name": None if library else library_name,
+            "loan_identifier": loan_identifier,
         }
         lookup_keys = {k: v for k, v in _potential_lookup_keys.items() if v is not None}
         additional_columns = {