From 389a7583fb15b70ccd6dd6e117b5b6b5b1a44955 Mon Sep 17 00:00:00 2001 From: Paul Butcher Date: Wed, 21 Aug 2024 16:56:09 +0100 Subject: [PATCH] Add script to revert Miro suppressions (#2692) * first draft of unsuppression * add image unsuppression * Apply auto-formatting rules * Add pipeline 2024-08-15 (#2688) * Loop suppress miro (#2689) * suppress miro images in a loop * Apply auto-formatting rules * add dry run option * Apply auto-formatting rules --------- Co-authored-by: Buildkite on behalf of Wellcome Collection * New ArchivesDigital format, transformed from CALM Material type (#2687) * new ArchivesDigital format, transformed from CALM Material type * Apply auto-formatting rules * update label to Archives - Digital * update works ingestor tests --------- Co-authored-by: Buildkite on behalf of Wellcome Collection * add 2024-08-15/pipeline_config, missed in previous commit [ci skip] * Turn the EBSCO adapter schedule back on (#2690) * tidy and dry * tidy and dry * Apply auto-formatting rules * revert irrelevant change * Apply auto-formatting rules * tidy imports * clarify printing the output * Apply auto-formatting rules --------- Co-authored-by: Buildkite on behalf of Wellcome Collection Co-authored-by: Robert Kenny Co-authored-by: Robert Kenny --- scripts/README.md | 8 +++++ scripts/miro_updates.py | 69 ++++++++++++++++++++++++++++++++------ scripts/suppress_miro.py | 16 +++++---- scripts/unsuppress_miro.py | 59 ++++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 18 deletions(-) create mode 100644 scripts/unsuppress_miro.py diff --git a/scripts/README.md b/scripts/README.md index da7fbf0179..477ff15214 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -7,3 +7,11 @@ Things you'll probably want to do on occasion, scripted. * python3 * make * [pip-tools](https://github.com/jazzband/pip-tools) + +## Tasks + +### Unsuppressing a MIRO image + +See [unsuppress_miro.py](unsuppress_miro.py) + + diff --git a/scripts/miro_updates.py b/scripts/miro_updates.py index 2132018c6c..e6a4e8a180 100644 --- a/scripts/miro_updates.py +++ b/scripts/miro_updates.py @@ -8,6 +8,7 @@ import itertools import json import sys +import re import boto3 import httpx @@ -90,9 +91,17 @@ def _get_reindexer_topic_arn(): return outputs["topic_arn"]["value"] +def has_subscriptions(sns_client, *, topic_arn): + """ + Returns True if a topic ARN has any subscriptions (e.g. an SQS queue), False otherwise. + """ + resp = sns_client.list_subscriptions_by_topic(TopicArn=topic_arn) + + return bool(resp["Subscriptions"]) + + def _request_reindex_for(miro_id): sns_client = SESSION.client("sns") - message = { "jobConfigId": "miro--catalogue_miro_updates", "parameters": {"ids": [miro_id], "type": "SpecificReindexParameters"}, @@ -153,16 +162,6 @@ def _set_image_availability(*, miro_id, message: str, is_available: bool): _request_reindex_for(miro_id) -def make_image_available(*, miro_id, message: str): - """ - Make a Miro image available on wellcomecollection.org. - """ - _set_image_availability(miro_id=miro_id, message=message, is_available=True) - print( - f"Warning: you need to register {miro_id} with DLCS separately", file=sys.stderr - ) - - def _remove_image_from_elasticsearch(*, miro_id): search_templates_url = ( "https://api.wellcomecollection.org/catalogue/v2/search-templates.json" @@ -264,6 +263,25 @@ def suppress_image(*, miro_id, message: str): _remove_image_from_cloudfront(miro_id=miro_id) +def unsuppress_image(*, miro_id: str, origin: str, message: str): + """ + Reinstate a hidden Miro image + """ + sns_client = SESSION.client("sns") + topic_arn = _get_reindexer_topic_arn() + if not has_subscriptions(sns_client, topic_arn=topic_arn): + print( + "Nothing is listening to the reindexer, this action will not have the expected effect, aborting" + ) + exit(1) + + # First, make the DDS record reflect that the image should be visible + _set_image_availability(miro_id=miro_id, message=message, is_available=True) + + # Now the actual image must be registered on DLCS so that it can be seen + register_on_dlcs(origin_url=origin, miro_id=miro_id) + + def _set_overrides(*, miro_id, message: str, override_key: str, override_value: str): item = DYNAMO_CLIENT.get_item(TableName=TABLE_NAME, Key={"id": miro_id})["Item"] @@ -461,3 +479,32 @@ def update_miro_image_suppressions_doc(): print( f"*** To approve these changes, visit https://github.com/wellcomecollection/private/pull/{new_pr_number}" ) + + +def register_on_dlcs(origin_url, miro_id): + dlcs_response = dlcs_api_client().post( + f"https://api.dlcs.io/customers/2/queue/priority", + json={ + "@type": "Collection", + "member": [ + { + "space": "8", + "origin": origin_url, + "id": miro_id, + "mediaType": "image/jpeg", + } + ], + }, + ) + # DLCS will process the above request asynchronously and it may take considerable time. + # This is particularly true if it is already busy with something else. + # The response contains details that will allow you to interrogate DLCS to + # find out whether it has processed (or failed to process - e.g. there's a typo in your origin_url) your request. + print(dlcs_response.text) + + +RE_MIRO_ID = re.compile("^[A-Z][0-9]{7}[A-Z]{0,4}[0-9]{0,2}$") + + +def is_valid_miro_id(maybe_miro_id: str): + return RE_MIRO_ID.fullmatch(maybe_miro_id) diff --git a/scripts/suppress_miro.py b/scripts/suppress_miro.py index a07419410f..662cd7d596 100755 --- a/scripts/suppress_miro.py +++ b/scripts/suppress_miro.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 import click -import re import httpx import sys -from miro_updates import suppress_image, update_miro_image_suppressions_doc - -miro_id_regex = re.compile("^[A-Z][0-9]{7}[A-Z]{0,4}[0-9]{0,2}$") +from miro_updates import ( + suppress_image, + update_miro_image_suppressions_doc, + is_valid_miro_id, +) @click.command() @click.option( - "--id_source", + "--id-source", help="newline-separated list of MIRO ids", type=click.File("r"), default=sys.stdin, @@ -40,8 +41,9 @@ def suppress_miro(id_source, message, dry_run): def valid_ids(id_source): for single_id in id_source: - if miro_id_regex.search(single_id): - yield single_id.strip() + single_id = single_id.strip() + if is_valid_miro_id(single_id): + yield single_id else: catalogue_response = httpx.get( f"https://api.wellcomecollection.org/catalogue/v2/works/{single_id}?include=identifiers" diff --git a/scripts/unsuppress_miro.py b/scripts/unsuppress_miro.py new file mode 100644 index 0000000000..cd92aee592 --- /dev/null +++ b/scripts/unsuppress_miro.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import click + +from miro_updates import unsuppress_image, is_valid_miro_id + + +@click.command() +@click.argument("id") +@click.option( + "--origin", + help="URL of the image", + required=True, +) +@click.option( + "--message", + help="Why the image was reinstated, a link to a Slack message, etc.", + required=True, +) +def unsuppress_miro(id, origin, message): + """ + Reinstates a previously suppressed Miro image with a given ID and origin + + ID is a MIRO identifier + origin is the URL of the image it corresponds to. + + Prerequisites: + - You have a MIRO id you wish to reinstate. + - Find the image in the Storage Service bucket + - Configure the pipeline to listen to the reindexer + + Usage: + - provide the MIRO id as --id + - provide the https://s3... URL for the image as --origin + - give a reason or link in --message + + thus: + + python unsuppress_miro.py --id L0099099 --origin https://s3-.../L0099099.JP2 --message "because I say so" + + This may fail with a message + "Delivery channels are required when updating an existing Asset via PUT" + + This indicates that the image in question is already on DLCS (though it may be in an error state). + If you are confident that it is not working, and you wish it to be, suppress it + (specifically, this is in order remove it from DLCS) and try again. + """ + id = id.strip() + if is_valid_miro_id(id): + miro_id = id + else: + raise click.ClickException( + f"{id} doesn't look like a Miro ID and isn't the identifier of a catalogue record containing a Miro ID" + ) + + unsuppress_image(miro_id=miro_id, origin=origin, message=message) + + +if __name__ == "__main__": + unsuppress_miro()