diff --git a/alembic/versions/20240821_7a2fcaac8b63_add_loan_identifier_column_to_playtime_tables.py b/alembic/versions/20240821_7a2fcaac8b63_add_loan_identifier_column_to_playtime_tables.py index 6609abd4e..2155a73dd 100644 --- a/alembic/versions/20240821_7a2fcaac8b63_add_loan_identifier_column_to_playtime_tables.py +++ b/alembic/versions/20240821_7a2fcaac8b63_add_loan_identifier_column_to_playtime_tables.py @@ -22,82 +22,15 @@ def upgrade() -> None: op.add_column( "playtime_entries", - sa.Column("loan_identifier", sa.String(length=50), nullable=False, default=""), - ) - - op.drop_constraint( - "unique_playtime_entry", - "playtime_entries", - type_="unique", - ) - - op.create_unique_constraint( - "unique_playtime_entry", - "playtime_entries", - [ - "tracking_id", - "identifier_str", - "collection_name", - "library_name", - "loan_identifier", - ], + sa.Column("loan_identifier", sa.String(length=40), nullable=False, default=""), ) op.add_column( "playtime_summaries", - sa.Column("loan_identifier", sa.String(length=50), nullable=False, default=""), - ) - - op.drop_constraint( - "unique_playtime_summary", - "playtime_summaries", - type_="unique", - ) - - op.create_unique_constraint( - "unique_playtime_summary", - "playtime_summaries", - [ - "timestamp", - "identifier_str", - "collection_name", - "library_name", - "loan_identifier", - ], + sa.Column("loan_identifier", sa.String(length=40), nullable=False, default=""), ) def downgrade() -> None: - op.drop_constraint( - "unique_playtime_entry", - "playtime_entries", - type_="unique", - ) - op.drop_column("playtime_entries", "loan_identifier") - - op.create_unique_constraint( - "unique_playtime_entry", - "playtime_entries", - [ - "tracking_id", - "timestamp", - "identifier_str", - "collection_name", - "library_name", - ], - ) - - op.drop_constraint( - "unique_playtime_summary", - "playtime_summaries", - type_="unique", - ) - op.drop_column("playtime_summaries", "loan_identifier") - - op.create_unique_constraint( - "unique_playtime_summary", - "playtime_summaries", - ["timestamp", "identifier_str", "collection_name", "library_name"], - ) diff --git a/src/palace/manager/api/controller/playtime_entries.py b/src/palace/manager/api/controller/playtime_entries.py index ec58ef9b6..2043d4c1f 100644 --- a/src/palace/manager/api/controller/playtime_entries.py +++ b/src/palace/manager/api/controller/playtime_entries.py @@ -1,6 +1,6 @@ from __future__ import annotations -import hashlib +from datetime import timedelta import flask from pydantic import ValidationError @@ -16,19 +16,20 @@ from palace.manager.api.problem_details import NOT_FOUND_ON_REMOTE from palace.manager.core.problem_details import INVALID_INPUT from palace.manager.core.query.playtime_entries import PlaytimeEntries +from palace.manager.sqlalchemy.constants import EditionConstants from palace.manager.sqlalchemy.model.collection import Collection from palace.manager.sqlalchemy.model.identifier import Identifier from palace.manager.sqlalchemy.model.library import Library from palace.manager.sqlalchemy.model.licensing import LicensePool -from palace.manager.sqlalchemy.model.patron import Loan, Patron +from palace.manager.sqlalchemy.model.patron import Loan from palace.manager.sqlalchemy.util import get_one -def resolve_loan_identifier(patron: Patron, loan: Loan | None) -> str: +def resolve_loan_identifier(loan: Loan | None) -> str: def sha1(msg): - return hashlib.sha1(msg.encode()).hexdigest() + return - return sha1(f"loan: {loan.id}") if loan else sha1(f"patron:{patron.id}") + return sha1(f"loan: {loan.id}") if loan else "no-loan-found" class PlaytimeEntriesController(CirculationManagerController): @@ -61,6 +62,15 @@ def track_playtimes(self, collection_id, identifier_type, identifier_idn): except ValidationError as ex: return INVALID_INPUT.detailed(ex.json()) + min_time_entry = min([x.during_minute for x in data.time_entries]) + max_time_entry = max([x.during_minute for x in data.time_entries]) + + default_loan_period = timedelta( + collection.default_loan_period( + library=library, medium=EditionConstants.AUDIO_MEDIUM + ) + ) + loan = self._db.execute( select(Loan) .select_from(Loan) @@ -68,13 +78,13 @@ def track_playtimes(self, collection_id, identifier_type, identifier_idn): .where( LicensePool.identifier == identifier, Loan.patron == flask.request.patron, + Loan.start >= min_time_entry, + Loan.start + default_loan_period < max_time_entry, ) .order_by(Loan.start.desc()) ).first() - loan_identifier = resolve_loan_identifier( - loan=loan, patron=flask.request.patron - ) + loan_identifier = resolve_loan_identifier(loan=loan) responses, summary = PlaytimeEntries.insert_playtime_entries( self._db, diff --git a/src/palace/manager/scripts/playtime_entries.py b/src/palace/manager/scripts/playtime_entries.py index 279383081..b73c96e30 100644 --- a/src/palace/manager/scripts/playtime_entries.py +++ b/src/palace/manager/scripts/playtime_entries.py @@ -12,7 +12,9 @@ import dateutil.parser import pytz from sqlalchemy.sql.expression import and_, distinct, false, select, true -from sqlalchemy.sql.functions import coalesce, count, sum +from sqlalchemy.sql.functions import coalesce, count +from sqlalchemy.sql.functions import max as sql_max +from sqlalchemy.sql.functions import sum from palace.manager.core.config import Configuration from palace.manager.scripts.base import Script @@ -57,7 +59,7 @@ def do_run(self): PlaytimeEntry.timestamp <= cut_off, ) - # Aggregate entries per identifier-timestamp-collection-library grouping. + # Aggregate entries per identifier-timestamp-collection-library-loan_identifier grouping. # The label forms of the identifier, collection, and library are also # factored in, in case any of the foreign keys are missing. # Since timestamps should be on minute-boundaries the aggregation @@ -218,13 +220,23 @@ def do_run(self): self.log.warning(temp.read()) def _fetch_report_records(self, start: datetime, until: datetime) -> Query: + # The loan count query returns only non-empty string isbns and titles if there is more + # than one row returned with the grouping. This way we ensure that we do not + # count the same loan twice in the case we have when a + # 1. a single loan with identifier A + # 2. and one or more playtime summaries with title A or no title or isbn A or no isbn + # 3. and one more playtime summaries with title B, isbn B + # This situation can occur when the title and isbn metadata associated with an ID changes due to a feed + # update that occurs between playlist entry posts. + # in this case we just associate the loan identifier with one unique combination of the list of titles and isbn + # values. loan_count_query = ( select( PlaytimeSummary.identifier_str.label("identifier_str2"), PlaytimeSummary.collection_name.label("collection_name2"), PlaytimeSummary.library_name.label("library_name2"), - coalesce(PlaytimeSummary.isbn, "").label("isbn2"), - coalesce(PlaytimeSummary.title, "").label("title2"), + sql_max(coalesce(PlaytimeSummary.isbn, "")).label("isbn2"), + sql_max(coalesce(PlaytimeSummary.title, "")).label("title2"), count(distinct(PlaytimeSummary.loan_identifier)).label("loan_count"), ) .where(PlaytimeSummary.timestamp.between(start, until)) @@ -232,8 +244,6 @@ def _fetch_report_records(self, start: datetime, until: datetime) -> Query: PlaytimeSummary.identifier_str, PlaytimeSummary.collection_name, PlaytimeSummary.library_name, - PlaytimeSummary.isbn, - PlaytimeSummary.title, PlaytimeSummary.identifier_id, ) .subquery() @@ -260,14 +270,14 @@ def _fetch_report_records(self, start: datetime, until: datetime) -> Query: .subquery() ) - combined = self._db.query(seconds_query, loan_count_query).join( + combined = self._db.query(seconds_query, loan_count_query).outerjoin( loan_count_query, and_( - loan_count_query.c.identifier_str2 == seconds_query.c.identifier_str, - loan_count_query.c.collection_name2 == seconds_query.c.collection_name, - loan_count_query.c.library_name2 == seconds_query.c.library_name, - loan_count_query.c.isbn2 == seconds_query.c.isbn, - loan_count_query.c.title2 == seconds_query.c.title, + seconds_query.c.identifier_str == loan_count_query.c.identifier_str2, + seconds_query.c.collection_name == loan_count_query.c.collection_name2, + seconds_query.c.library_name == loan_count_query.c.library_name2, + seconds_query.c.isbn == loan_count_query.c.isbn2, + seconds_query.c.title == loan_count_query.c.title2, ), ) combined_sq = combined.subquery() @@ -279,7 +289,7 @@ def _fetch_report_records(self, start: datetime, until: datetime) -> Query: combined_sq.c.isbn, combined_sq.c.title, combined_sq.c.total_seconds_played, - combined_sq.c.loan_count, + coalesce(combined_sq.c.loan_count, 0), ).order_by( combined_sq.c.collection_name, combined_sq.c.library_name, diff --git a/src/palace/manager/sqlalchemy/model/time_tracking.py b/src/palace/manager/sqlalchemy/model/time_tracking.py index 509f90bd1..02544ca26 100644 --- a/src/palace/manager/sqlalchemy/model/time_tracking.py +++ b/src/palace/manager/sqlalchemy/model/time_tracking.py @@ -74,7 +74,7 @@ class PlaytimeEntry(Base): collection: Mapped[Collection] = relationship("Collection", uselist=False) library: Mapped[Library] = relationship("Library", uselist=False) - loan_identifier = Column(String(50), nullable=False) + loan_identifier = Column(String(40), nullable=False) __table_args__ = ( UniqueConstraint( @@ -82,7 +82,6 @@ class PlaytimeEntry(Base): "identifier_str", "collection_name", "library_name", - "loan_identifier", name="unique_playtime_entry", ), ) @@ -131,7 +130,7 @@ class PlaytimeSummary(Base): title = Column(String) isbn = Column(String) - loan_identifier = Column(String(50), nullable=False) + loan_identifier = Column(String(40), nullable=False) identifier: Mapped[Identifier] = relationship("Identifier", uselist=False) collection: Mapped[Collection] = relationship("Collection", uselist=False) @@ -143,7 +142,6 @@ class PlaytimeSummary(Base): "identifier_str", "collection_name", "library_name", - "loan_identifier", name="unique_playtime_summary", ), ) diff --git a/tests/manager/api/controller/test_playtime_entries.py b/tests/manager/api/controller/test_playtime_entries.py index 7fa48abdf..74142d329 100644 --- a/tests/manager/api/controller/test_playtime_entries.py +++ b/tests/manager/api/controller/test_playtime_entries.py @@ -106,7 +106,7 @@ def test_track_playtime_duplicate_id_ok( identifier = db.identifier() collection = db.default_collection() library = db.default_library() - + patron = db.patron() # Attach the identifier to the collection pool = db.licensepool( db.edition( @@ -114,9 +114,8 @@ def test_track_playtime_duplicate_id_ok( ), collection=collection, ) - patron = db.patron() - loan_identifier = resolve_loan_identifier(loan=None, patron=patron) + loan_identifier = resolve_loan_identifier(loan=None) db.session.add( PlaytimeEntry( diff --git a/tests/manager/scripts/test_playtime_entries.py b/tests/manager/scripts/test_playtime_entries.py index d93fd522b..fac8fe9db 100644 --- a/tests/manager/scripts/test_playtime_entries.py +++ b/tests/manager/scripts/test_playtime_entries.py @@ -10,7 +10,6 @@ from freezegun import freeze_time from sqlalchemy.sql.expression import and_, null -from palace.manager.api.controller.playtime_entries import resolve_loan_identifier from palace.manager.api.model.time_tracking import PlaytimeTimeEntry from palace.manager.core.config import Configuration from palace.manager.core.equivalents_coverage import ( @@ -87,8 +86,7 @@ def test_summation(self, db: DatabaseTransactionFixture): collection2 = db.collection(name=c2_old_name) library2 = db.library(name=l2_old_name) loan_identifier, loan_identifier2, loan_identifier3, loan_identifier4 = ( - resolve_loan_identifier(db.patron(external_identifier=str(x)), None) - for x in range(0, 4) + f"loan-id:{x}" for x in range(0, 4) ) entries = create_playtime_entries( db, @@ -286,7 +284,7 @@ def test_reap_processed_entries(self, db: DatabaseTransactionFixture): identifier = db.identifier() collection = db.default_collection() library = db.default_library() - loan_identifier = resolve_loan_identifier(db.patron(), None) + loan_identifier = "loan-id" entries = create_playtime_entries( db, identifier, @@ -544,17 +542,16 @@ def test_do_run( collection2 = db.collection() library2 = db.library() - def create_loan_identifier(patron_id: str) -> str: - patron = db.patron(external_identifier=patron_id) - return resolve_loan_identifier(patron=patron, loan=None) + identifier3 = db.identifier() - loan_identifiers = [create_loan_identifier(str(x)) for x in range(1, 6)] + loan_identifiers = [f"loan_id:{x}" for x in range(1, 7)] ( loan_identifier, loan_identifier2, loan_identifier3, loan_identifier4, loan_identifier5, + loan_identifier6, ) = loan_identifiers isbn_ids: dict[str, Identifier] = { @@ -623,6 +620,42 @@ def create_loan_identifier(patron_id: str) -> str: loan_identifier5, ) + playtime( + db.session, + identifier3, + collection2, + library2, + dt1m(10), + 800, + loan_identifier6, + ) + + # log a summary where a title that was previously unavailable is now available for the same loan + edition2 = db.edition(title="A test") + edition2.primary_identifier = identifier3 + + playtime( + db.session, + identifier3, + collection2, + library2, + dt1m(15), + 13, + loan_identifier6, + ) + + edition2.title = "Z test" + + playtime( + db.session, + identifier3, + collection2, + library2, + dt1m(20), + 4, + loan_identifier6, + ) + reporting_name = "test cm" with ( patch("palace.manager.scripts.playtime_entries.csv.writer") as writer, @@ -639,7 +672,7 @@ def create_loan_identifier(patron_id: str) -> str: # Assert assert ( - writer().writerow.call_count == 6 + writer().writerow.call_count == 9 ) # 1 header, 5 identifier,collection,library entries cutoff = date1m(0).replace(day=1) @@ -671,6 +704,42 @@ def create_loan_identifier(patron_id: str) -> str: 1, ) ), + call( + ( + column1, + identifier3.urn, + None, + collection2.name, + library2.name, + None, + 800, + 0, + ) + ), + call( + ( + column1, + identifier3.urn, + None, + collection2.name, + library2.name, + "A test", + 13, + 0, + ) + ), + call( + ( + column1, + identifier3.urn, + None, + collection2.name, + library2.name, + "Z test", + 4, + 1, + ) + ), call( ( column1, diff --git a/tests/manager/sqlalchemy/model/test_time_tracking.py b/tests/manager/sqlalchemy/model/test_time_tracking.py index 48a01ef7c..c93a33e11 100644 --- a/tests/manager/sqlalchemy/model/test_time_tracking.py +++ b/tests/manager/sqlalchemy/model/test_time_tracking.py @@ -143,7 +143,7 @@ def test_constraints(self, db: DatabaseTransactionFixture): loan_identifier=loan_id, ) assert ( - f"Key (tracking_id, identifier_str, collection_name, library_name, loan_identifier)=(tracking-id-0, {identifier.urn}, {collection.name}, {library.name}, {loan_id}) already exists" + f"Key (tracking_id, identifier_str, collection_name, library_name)=(tracking-id-0, {identifier.urn}, {collection.name}, {library.name}) already exists" in raised.exconly() ) @@ -185,7 +185,7 @@ def test_contraints(self, db: DatabaseTransactionFixture): loan_identifier=loan_id, ) assert ( - f'Key ("timestamp", identifier_str, collection_name, library_name, loan_identifier)=(2000-01-01 12:00:00+00, {identifier.urn}, {collection.name}, {library.name}, {loan_id}) already exists' + f'Key ("timestamp", identifier_str, collection_name, library_name)=(2000-01-01 12:00:00+00, {identifier.urn}, {collection.name}, {library.name}) already exists' in raised.exconly() )