Skip to content

Commit

Permalink
[PP-1456] Add loan count column to audio book playback time reports. (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
dbernstein authored Sep 4, 2024
1 parent de62c4e commit 3f5d5ae
Show file tree
Hide file tree
Showing 8 changed files with 407 additions and 38 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Add loan_identifier column to playtime tables.
Revision ID: 7a2fcaac8b63
Revises: 7ba553f3f80d
Create Date: 2024-08-21 23:23:48.085451+00:00
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.orm.session import Session

# revision identifiers, used by Alembic.
revision = "7a2fcaac8b63"
down_revision = "7ba553f3f80d"
branch_labels = None
depends_on = None


def upgrade() -> None:
session = Session(bind=op.get_bind())
conn = session.connection()

op.add_column(
"playtime_entries",
sa.Column("loan_identifier", sa.String(length=40), nullable=False, default=""),
)

op.add_column(
"playtime_summaries",
sa.Column("loan_identifier", sa.String(length=40), nullable=False, default=""),
)

op.drop_constraint("unique_playtime_summary", "playtime_summaries", type_="unique")

op.create_unique_constraint(
"unique_playtime_summary",
"playtime_summaries",
[
"timestamp",
"identifier_str",
"collection_name",
"library_name",
"loan_identifier",
],
)


def downgrade() -> None:
op.drop_column("playtime_entries", "loan_identifier")

op.drop_constraint("unique_playtime_summary", "playtime_summaries", type_="unique")

op.drop_column("playtime_summaries", "loan_identifier")

op.create_unique_constraint(
"unique_playtime_summary",
"playtime_summaries",
["timestamp", "identifier_str", "collection_name", "library_name"],
)
44 changes: 43 additions & 1 deletion src/palace/manager/api/controller/playtime_entries.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

import hashlib

import flask
from pydantic import ValidationError
from sqlalchemy import select

from palace.manager.api.controller.circulation_manager import (
CirculationManagerController,
Expand All @@ -16,8 +19,23 @@
from palace.manager.sqlalchemy.model.collection import Collection
from palace.manager.sqlalchemy.model.identifier import Identifier
from palace.manager.sqlalchemy.model.library import Library
from palace.manager.sqlalchemy.model.licensing import LicensePool
from palace.manager.sqlalchemy.model.patron import Loan
from palace.manager.sqlalchemy.util import get_one

MISSING_LOAN_IDENTIFIER = "LOAN_NOT_FOUND"


def resolve_loan_identifier(loan: Loan | None) -> str:
def sha1(msg):
return

return (
hashlib.sha1(f"loan: {loan.id}".encode()).hexdigest()
if loan
else MISSING_LOAN_IDENTIFIER
)


class PlaytimeEntriesController(CirculationManagerController):
def track_playtimes(self, collection_id, identifier_type, identifier_idn):
Expand Down Expand Up @@ -49,8 +67,32 @@ def track_playtimes(self, collection_id, identifier_type, identifier_idn):
except ValidationError as ex:
return INVALID_INPUT.detailed(ex.json())

# attempt to resolve a loan associated with the patron, identifier, in the time period
entry_max_start_time = max([x.during_minute for x in data.time_entries])
entry_min_end_time = min([x.during_minute for x in data.time_entries])

loan = self._db.scalars(
select(Loan)
.select_from(Loan)
.join(LicensePool)
.where(
LicensePool.identifier == identifier,
Loan.patron == flask.request.patron,
Loan.start <= entry_max_start_time,
Loan.end > entry_min_end_time,
)
.order_by(Loan.start.desc())
).first()

loan_identifier = resolve_loan_identifier(loan=loan)

responses, summary = PlaytimeEntries.insert_playtime_entries(
self._db, identifier, collection, library, data
self._db,
identifier,
collection,
library,
data,
loan_identifier,
)

response_data = PlaytimeEntriesPostResponse(
Expand Down
2 changes: 2 additions & 0 deletions src/palace/manager/core/query/playtime_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def insert_playtime_entries(
collection: Collection,
library: Library,
data: PlaytimeEntriesPost,
loan_identifier: str,
) -> tuple[list, PlaytimeEntriesPostSummary]:
"""Insert into the database playtime entries from a request"""
responses = []
Expand Down Expand Up @@ -59,6 +60,7 @@ def insert_playtime_entries(
library_name=library.name,
timestamp=entry.during_minute,
total_seconds_played=entry.seconds_played,
loan_identifier=loan_identifier,
)
except IntegrityError as ex:
logging.getLogger("Time Tracking").error(
Expand Down
96 changes: 76 additions & 20 deletions src/palace/manager/scripts/playtime_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

import dateutil.parser
import pytz
from sqlalchemy.sql.expression import false, true
from sqlalchemy.sql.expression import and_, distinct, false, select, true
from sqlalchemy.sql.functions import coalesce, count
from sqlalchemy.sql.functions import max as sql_max
from sqlalchemy.sql.functions import sum

from palace.manager.core.config import Configuration
Expand Down Expand Up @@ -57,7 +59,7 @@ def do_run(self):
PlaytimeEntry.timestamp <= cut_off,
)

# Aggregate entries per identifier-timestamp-collection-library grouping.
# Aggregate entries per identifier-timestamp-collection-library-loan_identifier grouping.
# The label forms of the identifier, collection, and library are also
# factored in, in case any of the foreign keys are missing.
# Since timestamps should be on minute-boundaries the aggregation
Expand All @@ -71,6 +73,7 @@ def group_key_for_entry(e: PlaytimeEntry) -> tuple:
e.identifier_str,
e.collection_name,
e.library_name,
e.loan_identifier,
)

by_group = defaultdict(int)
Expand All @@ -88,6 +91,7 @@ def group_key_for_entry(e: PlaytimeEntry) -> tuple:
identifier_str,
collection_name,
library_name,
loan_identifier,
) = group

# Update the playtime summary.
Expand All @@ -101,9 +105,11 @@ def group_key_for_entry(e: PlaytimeEntry) -> tuple:
identifier_str=identifier_str,
collection_name=collection_name,
library_name=library_name,
loan_identifier=loan_identifier,
)
self.log.info(
f"Added {seconds} to {identifier_str} ({collection_name} in {library_name}) for {timestamp}: new total {playtime.total_seconds_played}."
f"Added {seconds} to {identifier_str} ({collection_name} in {library_name} with loan id of "
f"{loan_identifier}) for {timestamp}: new total {playtime.total_seconds_played}."
)

self._db.commit()
Expand Down Expand Up @@ -214,33 +220,80 @@ def do_run(self):
self.log.warning(temp.read())

def _fetch_report_records(self, start: datetime, until: datetime) -> Query:
return (
self._db.query(PlaytimeSummary)
.with_entities(
# The loan count query returns only non-empty string isbns and titles if there is more
# than one row returned with the grouping. This way we ensure that we do not
# count the same loan twice in the case we have when a
# 1. a single loan with identifier A
# 2. and one or more playtime summaries with title A or no title or isbn A or no isbn
# 3. and one more playtime summaries with title B, isbn B
# This situation can occur when the title and isbn metadata associated with an ID changes due to a feed
# update that occurs between playlist entry posts.
# in this case we just associate the loan identifier with one unique combination of the list of titles and isbn
# values.
loan_count_query = (
select(
PlaytimeSummary.identifier_str.label("identifier_str2"),
PlaytimeSummary.collection_name.label("collection_name2"),
PlaytimeSummary.library_name.label("library_name2"),
sql_max(coalesce(PlaytimeSummary.isbn, "")).label("isbn2"),
sql_max(coalesce(PlaytimeSummary.title, "")).label("title2"),
count(distinct(PlaytimeSummary.loan_identifier)).label("loan_count"),
)
.where(PlaytimeSummary.timestamp.between(start, until))
.group_by(
PlaytimeSummary.identifier_str,
PlaytimeSummary.collection_name,
PlaytimeSummary.library_name,
PlaytimeSummary.isbn,
PlaytimeSummary.title,
sum(PlaytimeSummary.total_seconds_played),
PlaytimeSummary.identifier_id,
)
.filter(
PlaytimeSummary.timestamp >= start,
PlaytimeSummary.timestamp < until,
.subquery()
)

seconds_query = (
select(
PlaytimeSummary.identifier_str,
PlaytimeSummary.collection_name,
PlaytimeSummary.library_name,
coalesce(PlaytimeSummary.isbn, "").label("isbn"),
coalesce(PlaytimeSummary.title, "").label("title"),
sum(PlaytimeSummary.total_seconds_played).label("total_seconds_played"),
)
.where(PlaytimeSummary.timestamp.between(start, until))
.group_by(
PlaytimeSummary.identifier_str,
PlaytimeSummary.collection_name,
PlaytimeSummary.library_name,
PlaytimeSummary.identifier_id,
PlaytimeSummary.isbn,
PlaytimeSummary.title,
PlaytimeSummary.identifier_id,
)
.order_by(
PlaytimeSummary.collection_name,
PlaytimeSummary.library_name,
PlaytimeSummary.identifier_str,
)
.subquery()
)

combined = self._db.query(seconds_query, loan_count_query).outerjoin(
loan_count_query,
and_(
seconds_query.c.identifier_str == loan_count_query.c.identifier_str2,
seconds_query.c.collection_name == loan_count_query.c.collection_name2,
seconds_query.c.library_name == loan_count_query.c.library_name2,
seconds_query.c.isbn == loan_count_query.c.isbn2,
seconds_query.c.title == loan_count_query.c.title2,
),
)
combined_sq = combined.subquery()

return self._db.query(
combined_sq.c.identifier_str,
combined_sq.c.collection_name,
combined_sq.c.library_name,
combined_sq.c.isbn,
combined_sq.c.title,
combined_sq.c.total_seconds_played,
coalesce(combined_sq.c.loan_count, 0),
).order_by(
combined_sq.c.collection_name,
combined_sq.c.library_name,
combined_sq.c.identifier_str,
)


Expand All @@ -256,6 +309,7 @@ def _produce_report(writer: Writer, date_label, records=None) -> None:
"library",
"title",
"total seconds",
"loan count",
)
)
for (
Expand All @@ -265,15 +319,17 @@ def _produce_report(writer: Writer, date_label, records=None) -> None:
isbn,
title,
total,
loan_count,
) in records:
row = (
date_label,
identifier_str,
isbn,
None if isbn == "" else isbn,
collection_name,
library_name,
title,
None if title == "" else title,
total,
loan_count,
)
# Write the row to the CSV
writer.writerow(row)
6 changes: 6 additions & 0 deletions src/palace/manager/sqlalchemy/model/time_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ class PlaytimeEntry(Base):
collection: Mapped[Collection] = relationship("Collection", uselist=False)
library: Mapped[Library] = relationship("Library", uselist=False)

loan_identifier = Column(String(40), nullable=False)

__table_args__ = (
UniqueConstraint(
"tracking_id",
Expand Down Expand Up @@ -128,6 +130,7 @@ class PlaytimeSummary(Base):

title = Column(String)
isbn = Column(String)
loan_identifier = Column(String(40), nullable=False)

identifier: Mapped[Identifier] = relationship("Identifier", uselist=False)
collection: Mapped[Collection] = relationship("Collection", uselist=False)
Expand All @@ -139,6 +142,7 @@ class PlaytimeSummary(Base):
"identifier_str",
"collection_name",
"library_name",
"loan_identifier",
name="unique_playtime_summary",
),
)
Expand All @@ -155,6 +159,7 @@ def add(
identifier_str: str,
collection_name: str,
library_name: str | None,
loan_identifier: str,
) -> PlaytimeSummary:
"""Add playtime (in seconds) to it's associated minute-level summary record."""
# Update each label with its current value, if its foreign key is present.
Expand All @@ -178,6 +183,7 @@ def add(
"collection_name": None if collection else collection_name,
"library_id": library.id if library else None,
"library_name": None if library else library_name,
"loan_identifier": loan_identifier,
}
lookup_keys = {k: v for k, v in _potential_lookup_keys.items() if v is not None}
additional_columns = {
Expand Down
Loading

0 comments on commit 3f5d5ae

Please sign in to comment.