From f7cc17a44663c3cae7dded268fb9ffb0f0a053e8 Mon Sep 17 00:00:00 2001 From: Shanrahan16 Date: Fri, 1 Sep 2023 15:01:06 +0100 Subject: [PATCH] #35 instrument controlled vocab check --- checksit/rules/rule_funcs.py | 37 +++++++++++++++++++ .../amof-image-global-attrs.yml | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index bfd090e5..348b0203 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -2,9 +2,12 @@ import re from datetime import datetime import requests +import json +import pandas as pd from . import processors from ..config import get_config +from pandas import json_normalize conf = get_config() rule_splitter = conf["settings"].get("rule_splitter", "|") @@ -187,6 +190,40 @@ def title_check(value, context, extras=None, label=""): return errors +def title_instrument(value, context, extras=None, label=""): + """ + A function to check if the instrument in the title is contained in the controlled vocabulary list + """ + warnings = [] + + instrument = value.partition("_")[0] + + # open JSON controlled vocab file: + f = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json', "r") + + ## Reading from file: + data = json.loads(f.read()) + + #print(type(data)) #delete + #descriptn = data.description.map(item,item.description) + + #d = json.loads('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json') + + #df = pd.json_normalize(data) + #print(df) #delete + #print(type(df)) #delete + + #if instrument not in data['ncas_instrument'][*]['description']: + #if instrument not in descriptn: + if instrument not in data['ncas_instrument']: + warnings.append(f"{label} '{value}' should be contained in the instrument controlled vocabulary list") + + # Closing file + f.close() + + return warnings + + def url_checker(value, context, extras=None, label=""): """ A function to check if the url exists diff --git a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml index 11a7439e..19c1a617 100644 --- a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml +++ b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml @@ -15,7 +15,7 @@ required-global-attrs: XMP-xmpRights:WebStatement: regex-warning:http://www.nationalarchives.gov.uk/doc/open-government-licence/ XMP-photoshop:Credit: regex-warning:National\sCentre\sfor\sAtmospheric\sScience\s\(NCAS\) # Source of the image - XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check + XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check, rule-func-warning:title_instrument # Instrument Scientist’s Details XMP-dc:Creator: rule-func-warning:list_of_names XMP-iptcCore:CreatorWorkEmail: regex-rule:work-email