Skip to content

Commit

Permalink
Add script to revert Miro suppressions (#2692)
Browse files Browse the repository at this point in the history
* first draft of unsuppression

* add image unsuppression

* Apply auto-formatting rules

* Add pipeline 2024-08-15 (#2688)

* Loop suppress miro (#2689)

* suppress miro images in a loop

* Apply auto-formatting rules

* add dry run option

* Apply auto-formatting rules

---------

Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]>

* New ArchivesDigital format, transformed from CALM Material type (#2687)

* new ArchivesDigital format, transformed from CALM Material type

* Apply auto-formatting rules

* update label to Archives - Digital

* update works ingestor tests

---------

Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]>

* add 2024-08-15/pipeline_config, missed in previous commit [ci skip]

* Turn the EBSCO adapter schedule back on (#2690)

* tidy and dry

* tidy and dry

* Apply auto-formatting rules

* revert irrelevant change

* Apply auto-formatting rules

* tidy imports

* clarify printing the output

* Apply auto-formatting rules

---------

Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]>
Co-authored-by: Robert Kenny <[email protected]>
Co-authored-by: Robert Kenny <[email protected]>
  • Loading branch information
4 people authored Aug 21, 2024
1 parent f066c6c commit 389a758
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 18 deletions.
8 changes: 8 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,11 @@ Things you'll probably want to do on occasion, scripted.
* python3
* make
* [pip-tools](https://github.com/jazzband/pip-tools)

## Tasks

### Unsuppressing a MIRO image

See [unsuppress_miro.py](unsuppress_miro.py)


69 changes: 58 additions & 11 deletions scripts/miro_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import itertools
import json
import sys
import re

import boto3
import httpx
Expand Down Expand Up @@ -90,9 +91,17 @@ def _get_reindexer_topic_arn():
return outputs["topic_arn"]["value"]


def has_subscriptions(sns_client, *, topic_arn):
"""
Returns True if a topic ARN has any subscriptions (e.g. an SQS queue), False otherwise.
"""
resp = sns_client.list_subscriptions_by_topic(TopicArn=topic_arn)

return bool(resp["Subscriptions"])


def _request_reindex_for(miro_id):
sns_client = SESSION.client("sns")

message = {
"jobConfigId": "miro--catalogue_miro_updates",
"parameters": {"ids": [miro_id], "type": "SpecificReindexParameters"},
Expand Down Expand Up @@ -153,16 +162,6 @@ def _set_image_availability(*, miro_id, message: str, is_available: bool):
_request_reindex_for(miro_id)


def make_image_available(*, miro_id, message: str):
"""
Make a Miro image available on wellcomecollection.org.
"""
_set_image_availability(miro_id=miro_id, message=message, is_available=True)
print(
f"Warning: you need to register {miro_id} with DLCS separately", file=sys.stderr
)


def _remove_image_from_elasticsearch(*, miro_id):
search_templates_url = (
"https://api.wellcomecollection.org/catalogue/v2/search-templates.json"
Expand Down Expand Up @@ -264,6 +263,25 @@ def suppress_image(*, miro_id, message: str):
_remove_image_from_cloudfront(miro_id=miro_id)


def unsuppress_image(*, miro_id: str, origin: str, message: str):
"""
Reinstate a hidden Miro image
"""
sns_client = SESSION.client("sns")
topic_arn = _get_reindexer_topic_arn()
if not has_subscriptions(sns_client, topic_arn=topic_arn):
print(
"Nothing is listening to the reindexer, this action will not have the expected effect, aborting"
)
exit(1)

# First, make the DDS record reflect that the image should be visible
_set_image_availability(miro_id=miro_id, message=message, is_available=True)

# Now the actual image must be registered on DLCS so that it can be seen
register_on_dlcs(origin_url=origin, miro_id=miro_id)


def _set_overrides(*, miro_id, message: str, override_key: str, override_value: str):
item = DYNAMO_CLIENT.get_item(TableName=TABLE_NAME, Key={"id": miro_id})["Item"]

Expand Down Expand Up @@ -461,3 +479,32 @@ def update_miro_image_suppressions_doc():
print(
f"*** To approve these changes, visit https://github.com/wellcomecollection/private/pull/{new_pr_number}"
)


def register_on_dlcs(origin_url, miro_id):
dlcs_response = dlcs_api_client().post(
f"https://api.dlcs.io/customers/2/queue/priority",
json={
"@type": "Collection",
"member": [
{
"space": "8",
"origin": origin_url,
"id": miro_id,
"mediaType": "image/jpeg",
}
],
},
)
# DLCS will process the above request asynchronously and it may take considerable time.
# This is particularly true if it is already busy with something else.
# The response contains details that will allow you to interrogate DLCS to
# find out whether it has processed (or failed to process - e.g. there's a typo in your origin_url) your request.
print(dlcs_response.text)


RE_MIRO_ID = re.compile("^[A-Z][0-9]{7}[A-Z]{0,4}[0-9]{0,2}$")


def is_valid_miro_id(maybe_miro_id: str):
return RE_MIRO_ID.fullmatch(maybe_miro_id)
16 changes: 9 additions & 7 deletions scripts/suppress_miro.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#!/usr/bin/env python3
import click
import re
import httpx
import sys

from miro_updates import suppress_image, update_miro_image_suppressions_doc

miro_id_regex = re.compile("^[A-Z][0-9]{7}[A-Z]{0,4}[0-9]{0,2}$")
from miro_updates import (
suppress_image,
update_miro_image_suppressions_doc,
is_valid_miro_id,
)


@click.command()
@click.option(
"--id_source",
"--id-source",
help="newline-separated list of MIRO ids",
type=click.File("r"),
default=sys.stdin,
Expand Down Expand Up @@ -40,8 +41,9 @@ def suppress_miro(id_source, message, dry_run):

def valid_ids(id_source):
for single_id in id_source:
if miro_id_regex.search(single_id):
yield single_id.strip()
single_id = single_id.strip()
if is_valid_miro_id(single_id):
yield single_id
else:
catalogue_response = httpx.get(
f"https://api.wellcomecollection.org/catalogue/v2/works/{single_id}?include=identifiers"
Expand Down
59 changes: 59 additions & 0 deletions scripts/unsuppress_miro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python3
import click

from miro_updates import unsuppress_image, is_valid_miro_id


@click.command()
@click.argument("id")
@click.option(
"--origin",
help="URL of the image",
required=True,
)
@click.option(
"--message",
help="Why the image was reinstated, a link to a Slack message, etc.",
required=True,
)
def unsuppress_miro(id, origin, message):
"""
Reinstates a previously suppressed Miro image with a given ID and origin
ID is a MIRO identifier
origin is the URL of the image it corresponds to.
Prerequisites:
- You have a MIRO id you wish to reinstate.
- Find the image in the Storage Service bucket
- Configure the pipeline to listen to the reindexer
Usage:
- provide the MIRO id as --id
- provide the https://s3... URL for the image as --origin
- give a reason or link in --message
thus:
python unsuppress_miro.py --id L0099099 --origin https://s3-.../L0099099.JP2 --message "because I say so"
This may fail with a message
"Delivery channels are required when updating an existing Asset via PUT"
This indicates that the image in question is already on DLCS (though it may be in an error state).
If you are confident that it is not working, and you wish it to be, suppress it
(specifically, this is in order remove it from DLCS) and try again.
"""
id = id.strip()
if is_valid_miro_id(id):
miro_id = id
else:
raise click.ClickException(
f"{id} doesn't look like a Miro ID and isn't the identifier of a catalogue record containing a Miro ID"
)

unsuppress_image(miro_id=miro_id, origin=origin, message=message)


if __name__ == "__main__":
unsuppress_miro()

0 comments on commit 389a758

Please sign in to comment.