From 3d83f7e5ca6986602c3a27a1f78374946358d17d Mon Sep 17 00:00:00 2001 From: Shanrahan16 Date: Thu, 31 Aug 2023 13:39:05 +0100 Subject: [PATCH 1/4] #35 data_product in title must be 'plot'/'photo' --- specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml index c4f6ef7b..11a7439e 100644 --- a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml +++ b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml @@ -15,7 +15,7 @@ required-global-attrs: XMP-xmpRights:WebStatement: regex-warning:http://www.nationalarchives.gov.uk/doc/open-government-licence/ XMP-photoshop:Credit: regex-warning:National\sCentre\sfor\sAtmospheric\sScience\s\(NCAS\) # Source of the image - XMP-dc:Title: regex-rule:title, regex-rule-warning:title-data-product, rule-func:title_check + XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check # Instrument Scientist’s Details XMP-dc:Creator: rule-func-warning:list_of_names XMP-iptcCore:CreatorWorkEmail: regex-rule:work-email From f7cc17a44663c3cae7dded268fb9ffb0f0a053e8 Mon Sep 17 00:00:00 2001 From: Shanrahan16 Date: Fri, 1 Sep 2023 15:01:06 +0100 Subject: [PATCH 2/4] #35 instrument controlled vocab check --- checksit/rules/rule_funcs.py | 37 +++++++++++++++++++ .../amof-image-global-attrs.yml | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index bfd090e5..348b0203 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -2,9 +2,12 @@ import re from datetime import datetime import requests +import json +import pandas as pd from . import processors from ..config import get_config +from pandas import json_normalize conf = get_config() rule_splitter = conf["settings"].get("rule_splitter", "|") @@ -187,6 +190,40 @@ def title_check(value, context, extras=None, label=""): return errors +def title_instrument(value, context, extras=None, label=""): + """ + A function to check if the instrument in the title is contained in the controlled vocabulary list + """ + warnings = [] + + instrument = value.partition("_")[0] + + # open JSON controlled vocab file: + f = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json', "r") + + ## Reading from file: + data = json.loads(f.read()) + + #print(type(data)) #delete + #descriptn = data.description.map(item,item.description) + + #d = json.loads('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json') + + #df = pd.json_normalize(data) + #print(df) #delete + #print(type(df)) #delete + + #if instrument not in data['ncas_instrument'][*]['description']: + #if instrument not in descriptn: + if instrument not in data['ncas_instrument']: + warnings.append(f"{label} '{value}' should be contained in the instrument controlled vocabulary list") + + # Closing file + f.close() + + return warnings + + def url_checker(value, context, extras=None, label=""): """ A function to check if the url exists diff --git a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml index 11a7439e..19c1a617 100644 --- a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml +++ b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml @@ -15,7 +15,7 @@ required-global-attrs: XMP-xmpRights:WebStatement: regex-warning:http://www.nationalarchives.gov.uk/doc/open-government-licence/ XMP-photoshop:Credit: regex-warning:National\sCentre\sfor\sAtmospheric\sScience\s\(NCAS\) # Source of the image - XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check + XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check, rule-func-warning:title_instrument # Instrument Scientist’s Details XMP-dc:Creator: rule-func-warning:list_of_names XMP-iptcCore:CreatorWorkEmail: regex-rule:work-email From 6f98d3f43f6c007f27bb21b9f3b5ef34e5655643 Mon Sep 17 00:00:00 2001 From: Shanrahan16 Date: Fri, 1 Sep 2023 15:26:40 +0100 Subject: [PATCH 3/4] #35 platform controlled vocab check --- checksit/rules/rule_funcs.py | 23 ++++++++++++++++++- .../amof-image-global-attrs.yml | 2 +- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index 348b0203..86c634d4 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -216,13 +216,34 @@ def title_instrument(value, context, extras=None, label=""): #if instrument not in data['ncas_instrument'][*]['description']: #if instrument not in descriptn: if instrument not in data['ncas_instrument']: - warnings.append(f"{label} '{value}' should be contained in the instrument controlled vocabulary list") + warnings.append(f"{label} '{instrument}' should be contained in the instrument controlled vocabulary list") # Closing file f.close() return warnings +def title_platform(value, context, extras=None, label=""): + """ + A function to check if the platform in the title is contained in the controlled vocabulary list + """ + warnings = [] + + platform = value.split("_")[1] + + # open JSON controlled vocab file: + g = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_platform.json', "r") + + ## Reading from file: + data = json.loads(g.read()) + + if platform not in data['platform']: + warnings.append(f"{label} '{platform}' should be contained in the platform controlled vocabulary list") + + # Closing file + g.close() + + return warnings def url_checker(value, context, extras=None, label=""): """ diff --git a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml index 19c1a617..defe2ec6 100644 --- a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml +++ b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml @@ -15,7 +15,7 @@ required-global-attrs: XMP-xmpRights:WebStatement: regex-warning:http://www.nationalarchives.gov.uk/doc/open-government-licence/ XMP-photoshop:Credit: regex-warning:National\sCentre\sfor\sAtmospheric\sScience\s\(NCAS\) # Source of the image - XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check, rule-func-warning:title_instrument + XMP-dc:Title: regex-rule:title-data-product, rule-func:title_check, rule-func-warning:title_instrument, rule-func-warning:title_platform # Instrument Scientist’s Details XMP-dc:Creator: rule-func-warning:list_of_names XMP-iptcCore:CreatorWorkEmail: regex-rule:work-email From 71f1937628fee7f860aaa7a4e845782ce89b4273 Mon Sep 17 00:00:00 2001 From: Shanrahan16 Date: Fri, 1 Sep 2023 15:44:33 +0100 Subject: [PATCH 4/4] #35 adding community instruments to vocab check --- checksit/rules/rule_funcs.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index 86c634d4..35a892f1 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -192,18 +192,20 @@ def title_check(value, context, extras=None, label=""): def title_instrument(value, context, extras=None, label=""): """ - A function to check if the instrument in the title is contained in the controlled vocabulary list + A function to check if the instrument in the title is contained in the controlled vocabulary lists """ warnings = [] instrument = value.partition("_")[0] - # open JSON controlled vocab file: - f = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json', "r") + # open JSON controlled vocab files: + n = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_ncas_instrument.json', "r") + c = open ('./checksit/vocabs/AMF_CVs/2.0.0/AMF_community_instrument.json', "r") ## Reading from file: - data = json.loads(f.read()) - + ncas_data = json.loads(n.read()) + community_data = json.loads(c.read()) + #print(type(data)) #delete #descriptn = data.description.map(item,item.description) @@ -215,11 +217,12 @@ def title_instrument(value, context, extras=None, label=""): #if instrument not in data['ncas_instrument'][*]['description']: #if instrument not in descriptn: - if instrument not in data['ncas_instrument']: - warnings.append(f"{label} '{instrument}' should be contained in the instrument controlled vocabulary list") + if instrument not in ncas_data['ncas_instrument'] and instrument not in community_data['community_instrument']: + warnings.append(f"{label} '{instrument}' should be contained one of the instrument controlled vocabulary lists") # Closing file - f.close() + n.close() + c.close() return warnings