Skip to content

Commit

Permalink
#35 instrument controlled vocab check
Browse files Browse the repository at this point in the history
  • Loading branch information
Shanrahan16 committed Sep 1, 2023
1 parent 3d83f7e commit f7cc17a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
37 changes: 37 additions & 0 deletions checksit/rules/rule_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
import re
from datetime import datetime
import requests
import json
import pandas as pd

from . import processors
from ..config import get_config
from pandas import json_normalize

conf = get_config()
rule_splitter = conf["settings"].get("rule_splitter", "|")
Expand Down Expand Up @@ -187,6 +190,40 @@ def title_check(value, context, extras=None, label=""):
return errors


def title_instrument(value, context, extras=None, label=""):
"""
A function to check if the instrument in the title is contained in the controlled vocabulary list
"""
warnings = []

instrument = value.partition("_")[0]

# open JSON controlled vocab file:
f = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json', "r")

## Reading from file:
data = json.loads(f.read())

#print(type(data)) #delete
#descriptn = data.description.map(item,item.description)

#d = json.loads('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json')

#df = pd.json_normalize(data)
#print(df) #delete
#print(type(df)) #delete

#if instrument not in data['ncas_instrument'][*]['description']:
#if instrument not in descriptn:
if instrument not in data['ncas_instrument']:
warnings.append(f"{label} '{value}' should be contained in the instrument controlled vocabulary list")

# Closing file
f.close()

return warnings


def url_checker(value, context, extras=None, label=""):
"""
A function to check if the url exists
Expand Down
2 changes: 1 addition & 1 deletion specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ required-global-attrs:
XMP-xmpRights:WebStatement: regex-warning:http://www.nationalarchives.gov.uk/doc/open-government-licence/
XMP-photoshop:Credit: regex-warning:National\sCentre\sfor\sAtmospheric\sScience\s\(NCAS\)
# Source of the image
XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check
XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check, rule-func-warning:title_instrument
# Instrument Scientist’s Details
XMP-dc:Creator: rule-func-warning:list_of_names
XMP-iptcCore:CreatorWorkEmail: regex-rule:work-email
Expand Down

1 comment on commit f7cc17a

@Shanrahan16
Copy link
Collaborator Author

@Shanrahan16 Shanrahan16 commented on f7cc17a Sep 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to decide whether we're checking against the instrument ID (as the filenames use currently) or the instrument description (as in make_specs.py).

I was unable to get it to check against the description, so it checks against the key(?) which is the instrument ID anyway.

Please sign in to comment.