From d4861ebe2e47ea0b90909096f581f8b37d94771a Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 20 Aug 2024 17:34:05 +0100
Subject: [PATCH 1/5] Changes due to clarification of standard

---
 checksit/generic.py                           | 53 ++++++++++++++-----
 checksit/rules/rule_funcs.py                  | 45 ++++++++++++++++
 .../ncas-radar-1.0.0/coordinate-variables.yml |  2 +-
 .../groups/ncas-radar-1.0.0/global-attrs.yml  |  8 +--
 .../sensor-pointing-variables.yml             |  6 ++-
 5 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/checksit/generic.py b/checksit/generic.py
index 3742769..7897ff9 100644
--- a/checksit/generic.py
+++ b/checksit/generic.py
@@ -319,10 +319,22 @@ def check_var(dct, variable, defined_attrs, rules_attrs=None, skip_spellcheck=Fa
                 attr_key = attr.split(":")[0]
                 attr_rule = ":".join(attr.split(":")[1:])
                 if attr_key not in dct["variables"][variable]:
-                    errors.append(
-                        f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. "
-                        f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}"
-                    )
+                    if not (
+                        attr_key == "standard_name" and attr_rule.split(":")[1] == "allow-proposed"
+                    ):
+                        errors.append(
+                            f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. "
+                            f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}"
+                        )
+                    else:
+                        rule_errors, rule_warnings = rules.check(
+                            attr_rule,
+                            dct["variables"][variable].get(attr_key),
+                            context=dct["variables"][variable].get("proposed_standard_name"),
+                            label=f"[variables:******:{variable}]***",
+                        )
+                        errors.extend(rule_errors)
+                        warnings.extend(rule_warnings)
                 elif is_undefined(dct["variables"][variable].get(attr_key)):
                     errors.append(
                         f"[variable:**************:{variable}]: No value defined for attribute '{attr_key}'."
@@ -386,14 +398,31 @@ def check_var(dct, variable, defined_attrs, rules_attrs=None, skip_spellcheck=Fa
                 attr_key = attr.split(":")[0]
                 attr_rule = ":".join(attr.split(":")[1:])
                 if attr_key not in dct["variables"][variable]:
-                    errors.append(
-                        f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. "
-                        f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}"
-                    )
-                elif is_undefined(dct["variables"][variable].get(attr_key)):
-                    errors.append(
-                        f"[variable:**************:{variable}]: No value defined for attribute '{attr_key}'."
-                    )
+                    if not (
+                        attr_key == "standard_name" and attr_rule.split(":")[1] == "allow-proposed"
+                    ):
+                        errors.append(
+                            f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. "
+                            f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}"
+                        )
+                    else:
+                        rule_errors, rule_warnings = rules.check(
+                            attr_rule,
+                            dct["variables"][variable].get(attr_key),
+                            context=dct["variables"][variable].get("proposed_standard_name"),
+                            label=f"[variables:******:{variable}]***",
+                        )
+                        errors.extend(rule_errors)
+                        warnings.extend(rule_warnings)
+                #if attr_key not in dct["variables"][variable]:
+                #    errors.append(
+                #        f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. "
+                #        f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}"
+                #    )
+                #elif is_undefined(dct["variables"][variable].get(attr_key)):
+                #    errors.append(
+                #        f"[variable:**************:{variable}]: No value defined for attribute '{attr_key}'."
+                #    )
                 elif attr_rule.startswith("rule-func:same-type-as"):
                     var_checking_against = attr_rule.split(":")[-1]
                     rule_errors, rule_warnings = rules.check(
diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py
index 118fc67..cf714cd 100644
--- a/checksit/rules/rule_funcs.py
+++ b/checksit/rules/rule_funcs.py
@@ -4,6 +4,7 @@
 import requests
 from urllib.request import urlopen
 import numpy as np
+import sys
 
 from . import processors
 from ..config import get_config
@@ -384,3 +385,47 @@ def check_qc_flags(value, context, extras=None, label=""):
         )
 
     return errors
+
+
+def check_utc_date_iso_format(value, context, extras=None, label=""):
+    """
+    Check date given is in ISO 8601 format and in UTC
+    value - date string
+    """
+    errors = []
+
+    if sys.version_info < (3,11): # python datetime changed its recognition of ISO format from 3.11 onward
+        if value.endswith("Z"):
+            value = value.replace("Z", "+00:00")
+        elif re.fullmatch(r"(\+|-)\d{4}", value[-5:]):
+            value = f"{value[:-2]}:{value[-2:]}"
+    try:
+        dt = datetime.fromisoformat(value)
+        if (dt.utcoffset() != None) and (dt.utcoffset().total_seconds() != 0):
+            errors.append(f"{label} Date string '{value}' not in UTC.")
+    except ValueError:
+        errors.append(f"{label} Date string '{value}' not in ISO 8601 format.")
+    except:
+        raise
+
+    return errors
+
+
+def allow_proposed(value, context, extras=None, label=""):
+    """
+    Check for proposed_standard_name if standard_name not given
+    value - value of the standard_name attribute
+    context - value of the proposed_standard_name attribute
+    extras - value to match
+    """
+    errors = []
+
+    if extras != None and isinstance(extras, list):
+        extras = extras[0]
+
+    if value != extras and context != extras:
+        errors.append(f"{label} does not contain standard_name or proposed_standard_name with value '{extras}'")
+
+    return errors
+
+
diff --git a/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml b/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml
index 08fab02..f2e4166 100644
--- a/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml
+++ b/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml
@@ -17,10 +17,10 @@ var-requires1:
       - range
     defined_attrs:
       - type:float
-      - standard_name:projection_range_coordinate
       - axis:radial_range_coordinate
     rules_attrs:
       - dimension: rule-func:match-one-of:range|sweep, range
       - units: rule-func:match-one-of:metres|meters
       - long_name: rule-func:string-of-length:5+
       - spacing_is_constant: rule-func:match-one-of:true|false
+      - standard_name: rule-func:allow-proposed:projection_range_coordinate
diff --git a/specs/groups/ncas-radar-1.0.0/global-attrs.yml b/specs/groups/ncas-radar-1.0.0/global-attrs.yml
index 5c15635..f1935ad 100644
--- a/specs/groups/ncas-radar-1.0.0/global-attrs.yml
+++ b/specs/groups/ncas-radar-1.0.0/global-attrs.yml
@@ -31,14 +31,14 @@ required-gloabl-attrs:
         creator_email: regex-rule:valid-email
         creator_url: regex-rule:valid-url||rule-func-warning:validate-orcid-ID
         processing_level: rule-func:match-one-of:1|2|3
-        last_revised_date: regex-rule:datetime
+        last_revised_date: rule-func:check-utc-date-iso-format
         project_principal_investigator_email: regex-rule:valid-email
         project_principal_investigator_url: regex-rule:valid-url||rule-func-warning:validate-orcid-ID
         platform: rule-func:ceda-platform||rule-func-warning:ncas-platform
         deployment_mode: rule-func:match-one-of:land|sea|air
-        time_coverage_start: regex-rule:datetimeZ
-        time_coverage_end: regex-rule:datetimeZ
+        time_coverage_start: rule-func:check-utc-date-iso-format
+        time_coverage_end: rule-func:check-utc-date-iso-format
     regex_attrs:
-        Conventions: ^(NCAS-Radar-1\.0|CfRadial-1\.4) (NCAS-Radar-1\.0|CfRadial-1\.4) instrument_parameters radar_parameters radar_calibration$
+        Conventions: ^(NCAS-Radar-1\.0 CfRadial-1\.4|CfRadial-1\.4 NCAS-Radar-1\.0)( instrument_parameters| radar_parameters| lidar_parameters| radar_calibration| lidar_calibration| platform_velocity| geometry_correction)*$
         product_version: ^v\d+\.\d+\.\d+$
 
diff --git a/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml b/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml
index 3985161..fe0e5b3 100644
--- a/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml
+++ b/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml
@@ -7,9 +7,10 @@ var-requires0:
       - type:float
       - dimension:time
       - units:degrees
-      - standard_name:ray_azimuth_angle
       - long_name:azimuth_angle_from_true_north
       - axis:radial_azimuth_coordinate
+    rules_attrs:
+      - standard_name: rule-func:allow-proposed:ray_azimuth_angle
 var-requires1:
   func: checksit.generic.check_var
   params:
@@ -19,6 +20,7 @@ var-requires1:
       - type:float
       - dimension:time
       - units:degrees
-      - standard_name:ray_elevation_angle
       - long_name:elevation_angle_from_horizontal_plane
       - axis:radial_elevation_coordinate
+    rules_attrs:
+      - standard_name: rule-func:allow-proposed:ray_elevation_angle

From 98c0dcacf33178e3a292f52d3c7544a7315e6590 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 1 Oct 2024 10:34:38 +0100
Subject: [PATCH 2/5] Change the non-existent URL in tests

---
 tests/test_rules.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index f24c117..2485593 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -157,7 +157,7 @@ def test_url_checker():
     assert crf.url_checker("https://www.example.com", {}, label="Test") == []
 
     # Test that the function correctly handles an unreachable URL
-    assert crf.url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"]
+    assert crf.url_checker("fake://www.nonexistenturl.com", {}, label="Test") == ["Test 'fake://www.nonexistenturl.com' is not a reachable url"]
 
     # Test that the function correctly handles an existing but unreachable URL
     assert crf.url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"]

From fcde548a51f66ca6d65a4acc2398f79660c5f8c3 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 1 Oct 2024 11:11:12 +0100
Subject: [PATCH 3/5] Add functions to test utc_date_iso_format and
 allow_proposed rules

---
 tests/test_rules.py | 62 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 2485593..3d8d892 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -223,6 +223,68 @@ def test_ncas_platform():
     # Test function returns error for example platform
     assert crf.ncas_platform("example", {}, label='Test') == ["Test 'example' is not a valid NCAS platform"]
 
+def test_utc_date_iso_format():
+    # Test function returns no errors for an ISO formatted date in UTC, with "Z", "+0000", and "+00:00" time zone identifiers
+    assert crf.check_utc_date_iso_format("2024-11-17T01:23:45Z", {}, label="Test") == []
+    assert (
+        crf.check_utc_date_iso_format("2024-11-17T01:23:45+0000", {}, label="Test")
+        == []
+    )
+    assert (
+        crf.check_utc_date_iso_format("2024-11-17T01:23:45 +0000", {}, label="Test")
+        == []
+    )
+    assert (
+        crf.check_utc_date_iso_format("2024-11-17T01:23:45+00:00", {}, label="Test")
+        == []
+    )
+    assert (
+        crf.check_utc_date_iso_format("2024-11-17 01:23:45+00:00", {}, label="Test")
+        == []
+    )
+    assert (
+        crf.check_utc_date_iso_format(
+            "2024-11-17T01:23:45.678901+00:00", {}, label="Test"
+        )
+        == []
+    )
+    # Test function returns error for ISO formatted date NOT in UTC
+    assert crf.check_utc_date_iso_format(
+        "2024-11-17T01:23:45+0100", {}, label="Test"
+    ) == ["Test Date string '2024-11-17T01:23:45+0100' not in UTC."]
+    assert crf.check_utc_date_iso_format(
+        "2024-11-17T01:23:45-01:00", {}, label="Test"
+    ) == ["Test Date string '2024-11-17T01:23:45-01:00' not in UTC."]
+    # Test function returns error for non ISO formatted date in UTC
+    assert crf.check_utc_date_iso_format("2024/11/17T01:23:45Z", {}, label="Test") == [
+        "Test Date string '2024/11/17T01:23:45Z' not in ISO 8601 format."
+    ]
+    assert crf.check_utc_date_iso_format(
+        "20241117T01-23-45+0000", {}, label="Test"
+    ) == ["Test Date string '20241117T01-23-45+0000' not in ISO 8601 format."]
+    # Test function returns error for something that is very much not a date
+    assert crf.check_utc_date_iso_format(
+        "11th November 2024 at 23 minutes and 45 seconds past 1 in the morning",
+        {},
+        label="Test",
+    ) == [
+        "Test Date string '11th November 2024 at 23 minutes and 45 seconds past 1 in the morning' not in ISO 8601 format."
+    ]
+
+
+def test_allow_proposed():
+    # Test function returns no errors when value of "extra" matches value of "value"
+    assert crf.allow_proposed("name1", None, extras="name1", label="Test") == []
+    assert crf.allow_proposed("name1", None, extras=["name1"], label="Test") == []
+    assert crf.allow_proposed("name1", "name2", extras="name1", label="Test") == []
+    # Test function returns no errors when value of "extra" matches value of "context"
+    assert crf.allow_proposed(None, "name2", extras="name2", label="Test") == []
+    # Test function returns errors when there is no match
+    assert crf.allow_proposed("name1", "name2", extras="name3", label="Test") == [
+        "Test does not contain standard_name or proposed_standard_name with value 'name3'"
+    ]
+
+
 # rules.py
 def _test_type(_type, value):
     return r.check(f"type-rule:{_type}", value)

From 7ee77417f333843dbe68ebcd3ae03f8b1b70a929 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 1 Oct 2024 11:11:41 +0100
Subject: [PATCH 4/5] Format file with black

---
 tests/test_rules.py | 660 ++++++++++++++++++++++++++++++--------------
 1 file changed, 458 insertions(+), 202 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 3d8d892..e548183 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -6,6 +6,7 @@
 from checksit.rules import rules as r
 import checksit.rules.rule_funcs as crf
 
+
 # rule_funcs.py
 def test_match_file_name():
     file_path = "happy_netcdf"
@@ -20,136 +21,261 @@ def test_match_file_name():
 
 def test_string_of_length():
     # Test that the function correctly handles strings of the minimum length
-    assert crf.string_of_length('abc', {}, ['3'], 'Test') == []
-    assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == []
+    assert crf.string_of_length("abc", {}, ["3"], "Test") == []
+    assert crf.string_of_length("abcd", {}, ["3+"], "Test") == []
 
     # Test that the function correctly handles strings shorter than the minimum length
-    assert crf.string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"]
-    assert crf.string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"]
+    assert crf.string_of_length("ab", {}, ["3"], "Test") == [
+        "Test 'ab' must be exactly 3 characters"
+    ]
+    assert crf.string_of_length("ab", {}, ["3+"], "Test") == [
+        "Test 'ab' must be at least 3 characters"
+    ]
 
     # Test that the function correctly handles strings longer than the minimum length
-    assert crf.string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"]
-    assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == []
+    assert crf.string_of_length("abcd", {}, ["3"], "Test") == [
+        "Test 'abcd' must be exactly 3 characters"
+    ]
+    assert crf.string_of_length("abcd", {}, ["3+"], "Test") == []
 
     # Test that the function correctly handles empty strings
-    assert crf.string_of_length('', {}, ['0'], 'Test') == []
-    assert crf.string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"]
-    assert crf.string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"]
+    assert crf.string_of_length("", {}, ["0"], "Test") == []
+    assert crf.string_of_length("", {}, ["1"], "Test") == [
+        "Test '' must be exactly 1 characters"
+    ]
+    assert crf.string_of_length("", {}, ["1+"], "Test") == [
+        "Test '' must be at least 1 characters"
+    ]
 
 
 def test_match_one_of():
     # Test that the function correctly handles valid inputs
-    assert crf.match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == []
+    assert crf.match_one_of("apple", {}, ["apple|banana|orange"], "Test") == []
 
     # Test that the function correctly handles invalid inputs
-    assert crf.match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_of("kiwi", {}, ["apple|banana|orange"], "Test") == [
+        "Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_of("", {}, ["apple|banana|orange"], "Test") == [
+        "Test '' must be one of: '['apple', 'banana', 'orange']'"
+    ]
 
 
 def test_match_one_or_more_of():
     # Test that the function correctly handles valid inputs
-    assert crf.match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == []
-    assert crf.match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == []
+    assert (
+        crf.match_one_or_more_of("apple,banana", {}, ["apple|banana|orange"], "Test")
+        == []
+    )
+    assert crf.match_one_or_more_of("apple", {}, ["apple|banana|orange"], "Test") == []
 
     # Test that the function correctly handles invalid inputs
-    assert crf.match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
-    assert crf.match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_or_more_of(
+        "apple,kiwi", {}, ["apple|banana|orange"], "Test"
+    ) == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_or_more_of("kiwi", {}, ["apple|banana|orange"], "Test") == [
+        "Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_or_more_of("", {}, ["apple|banana|orange"], "Test") == [
+        "Test '' must be one or more of: '['apple', 'banana', 'orange']'"
+    ]
 
 
 def test_validate_image_date_time():
     # Test that the function correctly handles valid date-time strings
-    assert crf.validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == []
-    assert crf.validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == []
+    assert crf.validate_image_date_time("2022:01:01 12:00:00", {}, label="Test") == []
+    assert (
+        crf.validate_image_date_time("2022:01:01 12:00:00.000000", {}, label="Test")
+        == []
+    )
 
     # Test that the function correctly handles invalid date-time strings
-    assert crf.validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
-    assert crf.validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
-    assert crf.validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
-    assert crf.validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time("2022-01-01 12:00:00", {}, label="Test") == [
+        "Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"
+    ]
+    assert crf.validate_image_date_time("2022:01:01 12:00", {}, label="Test") == [
+        "Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"
+    ]
+    assert crf.validate_image_date_time("2022:01:01", {}, label="Test") == [
+        "Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"
+    ]
+    assert crf.validate_image_date_time("2022:01:01 12:00:00.00", {}, label="Test") == [
+        "Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time("", {}, label="Test") == [
+        "Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"
+    ]
 
 
 def test_validate_orcid_ID():
     # Test that the function correctly handles valid ORCID IDs
-    assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == []
-    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == []
-    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == []
+    assert (
+        crf.validate_orcid_ID("https://orcid.org/0000-0002-1825-0097", {}, label="Test")
+        == []
+    )
+    assert (
+        crf.validate_orcid_ID("https://orcid.org/1234-5678-9012-3456", {}, label="Test")
+        == []
+    )
+    assert (
+        crf.validate_orcid_ID("https://orcid.org/1234-5678-9012-345X", {}, label="Test")
+        == []
+    )
 
     # Test that the function correctly handles ORCID IDs with incorrect lengths
-    assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID(
+        "https://orcid.org/0000-0002-1825-009", {}, label="Test"
+    ) == [
+        "Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
+    assert crf.validate_orcid_ID(
+        "https://orcid.org/1234-5678-9012-34567", {}, label="Test"
+    ) == [
+        "Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
 
     # Test that the function correctly handles ORCID IDs with incorrect formats
-    assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID(
+        "https://orcid.org/0000-0002-1825-009Z", {}, label="Test"
+    ) == [
+        "Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
+    assert crf.validate_orcid_ID(
+        "https://orcid.org/1234-5678-9012-34X5", {}, label="Test"
+    ) == [
+        "Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
+    assert crf.validate_orcid_ID(
+        "https://orcid.org/1234-5678-9012-3456-", {}, label="Test"
+    ) == [
+        "Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
+    assert crf.validate_orcid_ID(
+        "https://orcid.org/1234-5678-9012-3456X", {}, label="Test"
+    ) == [
+        "Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID("", {}, label="Test") == [
+        "Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"
+    ]
 
 
 def test_list_of_names():
     # Test that the function correctly handles valid names
-    assert crf.list_of_names('Doe, John', {}, label='Test') == []
-    assert crf.list_of_names('Doe, John J.', {}, label='Test') == []
-    assert crf.list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == []
+    assert crf.list_of_names("Doe, John", {}, label="Test") == []
+    assert crf.list_of_names("Doe, John J.", {}, label="Test") == []
+    assert crf.list_of_names(["Doe, John", "Smith, Jane"], {}, label="Test") == []
 
     # Test that the function correctly handles names with incorrect formats
-    assert crf.list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
-    assert crf.list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
-    assert crf.list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+    assert crf.list_of_names("John Doe", {}, label="Test") == [
+        "Test 'John Doe' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"
+    ]
+    assert crf.list_of_names("Doe John", {}, label="Test") == [
+        "Test 'Doe John' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"
+    ]
+    assert crf.list_of_names(["Doe, John", "Jane Smith"], {}, label="Test") == [
+        "Test '['Doe, John', 'Jane Smith']' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"
+    ]
 
     # Test that the function correctly handles names with invalid characters
-    assert crf.list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"]
-    assert crf.list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"]
-    assert crf.list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert crf.list_of_names("Doe, J0hn", {}, label="Test") == [
+        "Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"
+    ]
+    assert crf.list_of_names("Doe, John!", {}, label="Test") == [
+        "Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"
+    ]
+    assert crf.list_of_names(["Doe, John", "Smith, J@ne"], {}, label="Test") == [
+        "Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.list_of_names('', {}, label='Test') == ["Test '' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"]
-    assert crf.list_of_names([], {}, label='Test') == []
+    assert crf.list_of_names("", {}, label="Test") == [
+        "Test '' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate",
+        "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate",
+    ]
+    assert crf.list_of_names([], {}, label="Test") == []
 
 
 def test_headline():
     # Test that the function correctly handles valid headlines
-    assert crf.headline('This is a valid headline.', {}, label='Test') == []
-    assert crf.headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == []
-    assert crf.headline('This headline is exactly 10 characters.', {}, label='Test') == []
+    assert crf.headline("This is a valid headline.", {}, label="Test") == []
+    assert (
+        crf.headline(
+            "This headline is exactly 150 characters long " + "a" * 105,
+            {},
+            label="Test",
+        )
+        == []
+    )
+    assert (
+        crf.headline("This headline is exactly 10 characters.", {}, label="Test") == []
+    )
 
     # Test that the function correctly handles headlines longer than 150 characters
-    assert crf.headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"]
+    assert crf.headline(
+        "This headline is longer than 150 characters." + "a" * 120, {}, label="Test"
+    ) == [
+        "Test 'This headline is longer than 150 characters."
+        + "a" * 120
+        + "' should contain no more than one sentence"
+    ]
 
     # Test that the function correctly handles headlines with more than one sentence
-    assert crf.headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"]
+    assert crf.headline(
+        "This is a headline. It has two sentences.", {}, label="Test"
+    ) == [
+        "Test 'This is a headline. It has two sentences.' should contain no more than one sentence"
+    ]
 
     # Test that the function correctly handles headlines that do not start with a capital letter
-    assert crf.headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"]
+    assert crf.headline(
+        "this headline does not start with a capital letter.", {}, label="Test"
+    ) == [
+        "Test 'this headline does not start with a capital letter.' should start with a capital letter"
+    ]
 
     # Test that the function correctly handles headlines shorter than 10 characters
-    assert crf.headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"]
+    assert crf.headline("Too short", {}, label="Test") == [
+        "Test 'Too short' should be at least 10 characters"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.headline('', {}, label='Test') == ["Test '' should not be empty"]
+    assert crf.headline("", {}, label="Test") == ["Test '' should not be empty"]
 
 
 def test_title_check():
     # Test that the function correctly handles titles that match the filename
-    assert crf.title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == []
-    assert crf.title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == []
+    assert (
+        crf.title_check("happy_netcdf", "/path/to/file/happy_netcdf", label="Test")
+        == []
+    )
+    assert (
+        crf.title_check(
+            "happy_NetCDF.nc", "/path/to/file/happy_NetCDF.nc", label="Test"
+        )
+        == []
+    )
 
     # Test that the function correctly handles titles that do not match the filename
-    assert crf.title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"]
-    assert crf.title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"]
+    assert crf.title_check(
+        "sad_netcdf", "/path/to/file/happy_netcdf", label="Test"
+    ) == ["Test 'sad_netcdf' must match the name of the file"]
+    assert crf.title_check(
+        "happy_NetCDF.nc", "/path/to/file/sad_NetCDF.nc", label="Test"
+    ) == ["Test 'happy_NetCDF.nc' must match the name of the file"]
 
     # Test that the function correctly handles empty titles
-    assert crf.title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"]
+    assert crf.title_check("", "/path/to/file/happy_netcdf", label="Test") == [
+        "Test '' must match the name of the file"
+    ]
 
 
 def test_url_checker():
@@ -157,10 +283,14 @@ def test_url_checker():
     assert crf.url_checker("https://www.example.com", {}, label="Test") == []
 
     # Test that the function correctly handles an unreachable URL
-    assert crf.url_checker("fake://www.nonexistenturl.com", {}, label="Test") == ["Test 'fake://www.nonexistenturl.com' is not a reachable url"]
+    assert crf.url_checker("fake://www.nonexistenturl.com", {}, label="Test") == [
+        "Test 'fake://www.nonexistenturl.com' is not a reachable url"
+    ]
 
     # Test that the function correctly handles an existing but unreachable URL
-    assert crf.url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"]
+    assert crf.url_checker(
+        "https://www.example.com/nonexistentpage", {}, label="Test"
+    ) == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"]
 
     # Test that the function correctly handles an empty URL
     assert crf.url_checker("", {}, label="Test") == ["Test '' is not a reachable url"]
@@ -168,60 +298,107 @@ def test_url_checker():
 
 def test_relation_url_checker():
     # Test that the function correctly handles valid inputs
-    assert crf.relation_url_checker('relation https://example.com', {}, label='Test') == []
+    assert (
+        crf.relation_url_checker("relation https://example.com", {}, label="Test") == []
+    )
 
     # Test that the function correctly handles inputs without a space
-    assert crf.relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"]
+    assert crf.relation_url_checker(
+        "relationhttps://example.com", {}, label="Test"
+    ) == ["Test 'relationhttps://example.com' should contain a space before the url"]
 
     # Test that the function correctly handles inputs with an invalid URL
-    assert crf.relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"]
+    assert crf.relation_url_checker("relation https://", {}, label="Test") == [
+        "Test 'https://' is not a reachable url"
+    ]
 
     # Test that the function correctly handles empty strings
-    assert crf.relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"]
+    assert crf.relation_url_checker("", {}, label="Test") == [
+        "Test '' should contain a space before the url"
+    ]
 
 
 def test_latitude():
     # Test that the function correctly handles valid latitudes
-    assert crf.latitude('45.1234', {}, label='Test') == []
-    assert crf.latitude('-90.0000', {}, label='Test') == []
-    assert crf.latitude('90.0000', {}, label='Test') == []
+    assert crf.latitude("45.1234", {}, label="Test") == []
+    assert crf.latitude("-90.0000", {}, label="Test") == []
+    assert crf.latitude("90.0000", {}, label="Test") == []
 
     # Test that the function correctly handles invalid latitudes
-    assert crf.latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "]
-    assert crf.latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "]
-    assert crf.latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "]
+    assert crf.latitude("90.0001", {}, label="Test") == [
+        "Test '90.0001' must be within -90 and +90 "
+    ]
+    assert crf.latitude("-90.0001", {}, label="Test") == [
+        "Test '-90.0001' must be within -90 and +90 "
+    ]
+    assert crf.latitude("100.0000", {}, label="Test") == [
+        "Test '100.0000' must be within -90 and +90 "
+    ]
 
 
 def test_longitude():
     # Test that the function correctly handles valid longitudes
-    assert crf.longitude('45.1234', {}, label='Test') == []
-    assert crf.longitude('-180.0000', {}, label='Test') == []
-    assert crf.longitude('180.0000', {}, label='Test') == []
+    assert crf.longitude("45.1234", {}, label="Test") == []
+    assert crf.longitude("-180.0000", {}, label="Test") == []
+    assert crf.longitude("180.0000", {}, label="Test") == []
 
     # Test that the function correctly handles invalid longitudes
-    assert crf.longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "]
-    assert crf.longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "]
-    assert crf.longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "]
+    assert crf.longitude("180.0001", {}, label="Test") == [
+        "Test '180.0001' must be within -180 and +180 "
+    ]
+    assert crf.longitude("-180.0001", {}, label="Test") == [
+        "Test '-180.0001' must be within -180 and +180 "
+    ]
+    assert crf.longitude("200.0000", {}, label="Test") == [
+        "Test '200.0000' must be within -180 and +180 "
+    ]
 
 
 def test_ceda_platform():
     # Test function returns no errors for all NCAS platforms
-    for plat in ["bt-tower-t35", "cao", "cao-sparsholt", "cdao", "cdao-frongoch", "cvao", "faam", "iao", "wao"]:
+    for plat in [
+        "bt-tower-t35",
+        "cao",
+        "cao-sparsholt",
+        "cdao",
+        "cdao-frongoch",
+        "cvao",
+        "faam",
+        "iao",
+        "wao",
+    ]:
         assert crf.ceda_platform(plat, {}) == []
     # Test function returns no errors for a non-NCAS platform
     assert crf.ceda_platform("netheravon", {}) == []
     # Test function returns error for example platform
-    assert crf.ceda_platform("example", {}, label='Test') == ["Test 'example' is not a valid platform in the CEDA catalogue"]
+    assert crf.ceda_platform("example", {}, label="Test") == [
+        "Test 'example' is not a valid platform in the CEDA catalogue"
+    ]
 
 
 def test_ncas_platform():
     # Test function returns no errors for all NCAS platforms
-    for plat in ["bt-tower-t35", "cao", "cao-sparsholt", "cdao", "cdao-frongoch", "cvao", "faam", "iao", "wao"]:
+    for plat in [
+        "bt-tower-t35",
+        "cao",
+        "cao-sparsholt",
+        "cdao",
+        "cdao-frongoch",
+        "cvao",
+        "faam",
+        "iao",
+        "wao",
+    ]:
         assert crf.ncas_platform(plat, {}) == []
     # Test function returns error for a non-NCAS platform
-    assert crf.ncas_platform("netheravon", {}, label='Test') == ["Test 'netheravon' is not a valid NCAS platform"]
+    assert crf.ncas_platform("netheravon", {}, label="Test") == [
+        "Test 'netheravon' is not a valid NCAS platform"
+    ]
     # Test function returns error for example platform
-    assert crf.ncas_platform("example", {}, label='Test') == ["Test 'example' is not a valid NCAS platform"]
+    assert crf.ncas_platform("example", {}, label="Test") == [
+        "Test 'example' is not a valid NCAS platform"
+    ]
+
 
 def test_utc_date_iso_format():
     # Test function returns no errors for an ISO formatted date in UTC, with "Z", "+0000", and "+00:00" time zone identifiers
@@ -289,13 +466,14 @@ def test_allow_proposed():
 def _test_type(_type, value):
     return r.check(f"type-rule:{_type}", value)
 
+
 def test_type_rules():
     tt = _test_type
 
     _type = "number"
     for value in 3.4, -4:
         assert tt(_type, value) == ([], [])
-        print('sarah test', tt(_type, value))
+        print("sarah test", tt(_type, value))
 
     for value in "3", "3.4", ["hi"]:
         assert tt(_type, value) != ([], [])
@@ -321,189 +499,264 @@ def test_type_rules():
     for value in 3, 4.5, ["hi"]:
         assert tt(_type, value) != ([], [])
 
+
 # static regex rule tests
 @pytest.fixture
 def rules():
     return r.static_regex_rules
 
+
 def test_integer_rule(rules):
-    assert re.fullmatch(rules['integer']['regex-rule'], '123')
-    assert re.fullmatch(rules['integer']['regex-rule'], '-123')
-    assert not re.fullmatch(rules['integer']['regex-rule'], '123.45')
-    assert not re.fullmatch(rules['integer']['regex-rule'], 'abc')
-    assert not re.fullmatch(rules['integer']['regex-rule'], '')
+    assert re.fullmatch(rules["integer"]["regex-rule"], "123")
+    assert re.fullmatch(rules["integer"]["regex-rule"], "-123")
+    assert not re.fullmatch(rules["integer"]["regex-rule"], "123.45")
+    assert not re.fullmatch(rules["integer"]["regex-rule"], "abc")
+    assert not re.fullmatch(rules["integer"]["regex-rule"], "")
+
 
 def test_valid_email_rule(rules):
-    assert re.fullmatch(rules['valid-email']['regex-rule'], 'test@example.com')
-    assert re.fullmatch(rules['valid-email']['regex-rule'], 'test.test@example.com')
-    assert not re.fullmatch(rules['valid-email']['regex-rule'], 'test@example')
-    assert not re.fullmatch(rules['valid-email']['regex-rule'], 'test@.com')
-    assert not re.fullmatch(rules['valid-email']['regex-rule'], 'test@com')
+    assert re.fullmatch(rules["valid-email"]["regex-rule"], "test@example.com")
+    assert re.fullmatch(rules["valid-email"]["regex-rule"], "test.test@example.com")
+    assert not re.fullmatch(rules["valid-email"]["regex-rule"], "test@example")
+    assert not re.fullmatch(rules["valid-email"]["regex-rule"], "test@.com")
+    assert not re.fullmatch(rules["valid-email"]["regex-rule"], "test@com")
+
 
 def test_valid_url_rule(rules):
-    assert re.fullmatch(rules['valid-url']['regex-rule'], 'https://example.com')
-    assert re.fullmatch(rules['valid-url']['regex-rule'], 'http://example.com')
-    assert not re.fullmatch(rules['valid-url']['regex-rule'], 'htp://example.com')
-    assert not re.fullmatch(rules['valid-url']['regex-rule'], 'https:/example.com')
-    assert not re.fullmatch(rules['valid-url']['regex-rule'], 'https://example')
+    assert re.fullmatch(rules["valid-url"]["regex-rule"], "https://example.com")
+    assert re.fullmatch(rules["valid-url"]["regex-rule"], "http://example.com")
+    assert not re.fullmatch(rules["valid-url"]["regex-rule"], "htp://example.com")
+    assert not re.fullmatch(rules["valid-url"]["regex-rule"], "https:/example.com")
+    assert not re.fullmatch(rules["valid-url"]["regex-rule"], "https://example")
+
 
 def test_valid_url_or_na_rule(rules):
-    assert re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'https://example.com')
-    assert re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'http://example.com')
-    assert re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'N/A')
-    assert not re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'htp://example.com')
-    assert not re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'https:/example.com')
-    assert not re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'nan')
+    assert re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "https://example.com")
+    assert re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "http://example.com")
+    assert re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "N/A")
+    assert not re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "htp://example.com")
+    assert not re.fullmatch(
+        rules["valid-url-or-na"]["regex-rule"], "https:/example.com"
+    )
+    assert not re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "nan")
+
 
 def test_match_vN_M_rule(rules):
-    assert re.fullmatch(rules['match:vN.M']['regex-rule'], 'v1.0')
-    assert re.fullmatch(rules['match:vN.M']['regex-rule'], 'v2.1')
-    assert not re.fullmatch(rules['match:vN.M']['regex-rule'], 'v10')
-    assert not re.fullmatch(rules['match:vN.M']['regex-rule'], 'v1.01')
-    assert not re.fullmatch(rules['match:vN.M']['regex-rule'], 'v.1.0')
+    assert re.fullmatch(rules["match:vN.M"]["regex-rule"], "v1.0")
+    assert re.fullmatch(rules["match:vN.M"]["regex-rule"], "v2.1")
+    assert not re.fullmatch(rules["match:vN.M"]["regex-rule"], "v10")
+    assert not re.fullmatch(rules["match:vN.M"]["regex-rule"], "v1.01")
+    assert not re.fullmatch(rules["match:vN.M"]["regex-rule"], "v.1.0")
+
 
 def test_datetime_rule(rules):
-    assert re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01T00:00:00')
-    assert re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01T00:00:00.123')
-    assert not re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01 00:00:00')
-    assert not re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01T00:00')
-    assert not re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01')
+    assert re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01T00:00:00")
+    assert re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01T00:00:00.123")
+    assert not re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01 00:00:00")
+    assert not re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01T00:00")
+    assert not re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01")
+
 
 def test_datetimeZ_rule(rules):
-    assert re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00:00Z')
-    assert re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00:00.000Z')
-    assert not re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00:00')
-    assert not re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17 15:00:00Z')
-    assert not re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00Z')
+    assert re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00:00Z")
+    assert re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00:00.000Z")
+    assert not re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00:00")
+    assert not re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17 15:00:00Z")
+    assert not re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00Z")
+
 
 def test_datetime_or_na_rule(rules):
-    assert re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01T00:00:00')
-    assert re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01T00:00:00.123')
-    assert re.fullmatch(rules['datetime-or-na']['regex-rule'], 'N/A')
-    assert re.fullmatch(rules['datetime-or-na']['regex-rule'], 'NA')
-    assert re.fullmatch(rules['datetime-or-na']['regex-rule'], 'Not Applicable')
-    assert not re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01 00:00:00')
-    assert not re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01T00:00')
-    assert not re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01')
+    assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "2022-01-01T00:00:00")
+    assert re.fullmatch(
+        rules["datetime-or-na"]["regex-rule"], "2022-01-01T00:00:00.123"
+    )
+    assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "N/A")
+    assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "NA")
+    assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "Not Applicable")
+    assert not re.fullmatch(
+        rules["datetime-or-na"]["regex-rule"], "2022-01-01 00:00:00"
+    )
+    assert not re.fullmatch(rules["datetime-or-na"]["regex-rule"], "2022-01-01T00:00")
+    assert not re.fullmatch(rules["datetime-or-na"]["regex-rule"], "2022-01-01")
+
 
 def test_number_rule(rules):
-    assert re.fullmatch(rules['number']['regex-rule'], '123.45')
-    assert re.fullmatch(rules['number']['regex-rule'], '-123.45')
-    assert not re.fullmatch(rules['number']['regex-rule'], '-123.')
-    assert not re.fullmatch(rules['number']['regex-rule'], 'abc')
-    assert not re.fullmatch(rules['number']['regex-rule'], '')
-    assert not re.fullmatch(rules['number']['regex-rule'], '123.45abc')
+    assert re.fullmatch(rules["number"]["regex-rule"], "123.45")
+    assert re.fullmatch(rules["number"]["regex-rule"], "-123.45")
+    assert not re.fullmatch(rules["number"]["regex-rule"], "-123.")
+    assert not re.fullmatch(rules["number"]["regex-rule"], "abc")
+    assert not re.fullmatch(rules["number"]["regex-rule"], "")
+    assert not re.fullmatch(rules["number"]["regex-rule"], "123.45abc")
+
 
 def test_location_rule(rules):
-    assert re.fullmatch(rules['location']['regex-rule'], 'City, Country')
-    assert re.fullmatch(rules['location']['regex-rule'], 'City, Country, State')
-    assert not re.fullmatch(rules['location']['regex-rule'], 'City Country')
-    assert not re.fullmatch(rules['location']['regex-rule'], 'City,')
-    assert not re.fullmatch(rules['location']['regex-rule'], ',Country')
+    assert re.fullmatch(rules["location"]["regex-rule"], "City, Country")
+    assert re.fullmatch(rules["location"]["regex-rule"], "City, Country, State")
+    assert not re.fullmatch(rules["location"]["regex-rule"], "City Country")
+    assert not re.fullmatch(rules["location"]["regex-rule"], "City,")
+    assert not re.fullmatch(rules["location"]["regex-rule"], ",Country")
+
 
 def test_latitude_image_rule(rules):
-    assert re.fullmatch(rules['latitude-image']['regex-rule'], '+12.345678')
-    assert re.fullmatch(rules['latitude-image']['regex-rule'], '-12.345678')
-    assert not re.fullmatch(rules['latitude-image']['regex-rule'], '123.45')
-    assert not re.fullmatch(rules['latitude-image']['regex-rule'], '+123.456789')
-    assert not re.fullmatch(rules['latitude-image']['regex-rule'], '-123.456789')
+    assert re.fullmatch(rules["latitude-image"]["regex-rule"], "+12.345678")
+    assert re.fullmatch(rules["latitude-image"]["regex-rule"], "-12.345678")
+    assert not re.fullmatch(rules["latitude-image"]["regex-rule"], "123.45")
+    assert not re.fullmatch(rules["latitude-image"]["regex-rule"], "+123.456789")
+    assert not re.fullmatch(rules["latitude-image"]["regex-rule"], "-123.456789")
+
 
 def test_longitude_image_rule(rules):
-    assert re.fullmatch(rules['longitude-image']['regex-rule'], '+123.45678')
-    assert re.fullmatch(rules['longitude-image']['regex-rule'], '-123.45678')
-    assert not re.fullmatch(rules['longitude-image']['regex-rule'], '123')
-    assert not re.fullmatch(rules['longitude-image']['regex-rule'], '+1234.56789')
-    assert not re.fullmatch(rules['longitude-image']['regex-rule'], '-1234.56789')
+    assert re.fullmatch(rules["longitude-image"]["regex-rule"], "+123.45678")
+    assert re.fullmatch(rules["longitude-image"]["regex-rule"], "-123.45678")
+    assert not re.fullmatch(rules["longitude-image"]["regex-rule"], "123")
+    assert not re.fullmatch(rules["longitude-image"]["regex-rule"], "+1234.56789")
+    assert not re.fullmatch(rules["longitude-image"]["regex-rule"], "-1234.56789")
+
 
 def test_title_rule(rules):
-    assert re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0.png')
-    assert re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0.jpg')
-    assert not re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0.txt')
-    assert not re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.png')
-    assert not re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0')
+    assert re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0.png")
+    assert re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0.jpg")
+    assert not re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0.txt")
+    assert not re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.png")
+    assert not re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0")
+
 
 def test_title_data_product_rule(rules):
-    assert re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_plot_v1.0.png')
-    assert re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_photo_v1.0.jpg')
-    assert not re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_v1.0.txt')
-    assert not re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_plot_v1.png')
-    assert not re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_plot_v1.0')
+    assert re.fullmatch(
+        rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_plot_v1.0.png"
+    )
+    assert re.fullmatch(
+        rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_photo_v1.0.jpg"
+    )
+    assert not re.fullmatch(
+        rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_v1.0.txt"
+    )
+    assert not re.fullmatch(
+        rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_plot_v1.png"
+    )
+    assert not re.fullmatch(
+        rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_plot_v1.0"
+    )
+
 
 def test_name_format_rule(rules):
-    assert re.fullmatch(rules['name-format']['regex-rule'], 'Last, First M.')
-    assert re.fullmatch(rules['name-format']['regex-rule'], 'Last, First')
-    assert not re.fullmatch(rules['name-format']['regex-rule'], 'First Last')
-    assert not re.fullmatch(rules['name-format']['regex-rule'], 'Last, First, M.')
-    assert not re.fullmatch(rules['name-format']['regex-rule'], 'Last First M.')
+    assert re.fullmatch(rules["name-format"]["regex-rule"], "Last, First M.")
+    assert re.fullmatch(rules["name-format"]["regex-rule"], "Last, First")
+    assert not re.fullmatch(rules["name-format"]["regex-rule"], "First Last")
+    assert not re.fullmatch(rules["name-format"]["regex-rule"], "Last, First, M.")
+    assert not re.fullmatch(rules["name-format"]["regex-rule"], "Last First M.")
+
 
 def test_name_characters_rule(rules):
-    assert re.fullmatch(rules['name-characters']['regex-rule'], 'John_Doe')
-    assert re.fullmatch(rules['name-characters']['regex-rule'], 'John-Doe')
-    assert not re.fullmatch(rules['name-characters']['regex-rule'], 'John Doe!')
-    assert not re.fullmatch(rules['name-characters']['regex-rule'], 'John Doe@')
-    assert not re.fullmatch(rules['name-characters']['regex-rule'], 'John Doe#')
+    assert re.fullmatch(rules["name-characters"]["regex-rule"], "John_Doe")
+    assert re.fullmatch(rules["name-characters"]["regex-rule"], "John-Doe")
+    assert not re.fullmatch(rules["name-characters"]["regex-rule"], "John Doe!")
+    assert not re.fullmatch(rules["name-characters"]["regex-rule"], "John Doe@")
+    assert not re.fullmatch(rules["name-characters"]["regex-rule"], "John Doe#")
+
 
 def test_altitude_image_warning_rule(rules):
-    assert re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123 m')
-    assert re.fullmatch(rules['altitude-image-warning']['regex-rule'], '-123 m')
-    assert not re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123.45 m')
-    assert not re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123')
-    assert not re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123m')
+    assert re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123 m")
+    assert re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "-123 m")
+    assert not re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123.45 m")
+    assert not re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123")
+    assert not re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123m")
+
 
 def test_altitude_image_rule(rules):
-    assert re.fullmatch(rules['altitude-image']['regex-rule'], '123.45 m')
-    assert re.fullmatch(rules['altitude-image']['regex-rule'], '-123.45 m')
-    assert not re.fullmatch(rules['altitude-image']['regex-rule'], '123')
-    assert not re.fullmatch(rules['altitude-image']['regex-rule'], '123.45')
-    assert not re.fullmatch(rules['altitude-image']['regex-rule'], '123.45m')
+    assert re.fullmatch(rules["altitude-image"]["regex-rule"], "123.45 m")
+    assert re.fullmatch(rules["altitude-image"]["regex-rule"], "-123.45 m")
+    assert not re.fullmatch(rules["altitude-image"]["regex-rule"], "123")
+    assert not re.fullmatch(rules["altitude-image"]["regex-rule"], "123.45")
+    assert not re.fullmatch(rules["altitude-image"]["regex-rule"], "123.45m")
+
 
 def test_ncas_email_rule(rules):
-    assert re.fullmatch(rules['ncas-email']['regex-rule'], 'test@ncas.ac.uk')
-    assert re.fullmatch(rules['ncas-email']['regex-rule'], 'test.test@ncas.ac.uk')
-    assert not re.fullmatch(rules['ncas-email']['regex-rule'], 'test@example.com')
-    assert not re.fullmatch(rules['ncas-email']['regex-rule'], 'test@ncas.com')
-    assert not re.fullmatch(rules['ncas-email']['regex-rule'], 'test@ncas.ac')
+    assert re.fullmatch(rules["ncas-email"]["regex-rule"], "test@ncas.ac.uk")
+    assert re.fullmatch(rules["ncas-email"]["regex-rule"], "test.test@ncas.ac.uk")
+    assert not re.fullmatch(rules["ncas-email"]["regex-rule"], "test@example.com")
+    assert not re.fullmatch(rules["ncas-email"]["regex-rule"], "test@ncas.com")
+    assert not re.fullmatch(rules["ncas-email"]["regex-rule"], "test@ncas.ac")
+
 
 def test_map_type_rule():
-    assert r._map_type_rule('number') == Number
-    assert r._map_type_rule('integer') == int
-    assert r._map_type_rule('int') == int
-    assert r._map_type_rule('float') == float
-    assert r._map_type_rule('string') == str
-    assert r._map_type_rule('str') == str
+    assert r._map_type_rule("number") == Number
+    assert r._map_type_rule("integer") == int
+    assert r._map_type_rule("int") == int
+    assert r._map_type_rule("float") == float
+    assert r._map_type_rule("string") == str
+    assert r._map_type_rule("str") == str
     with pytest.raises(KeyError):
-        r._map_type_rule('nonexistent')
+        r._map_type_rule("nonexistent")
+
 
 def test_check():
     rules_instance = r
 
     # Test that the function correctly handles rule-func
-    assert rules_instance.check("rule-func:string_of_length:3", "abc", {}, label="Test") == ([], [])
-    assert rules_instance.check("rule-func:string_of_length:3", "abcd", {}, label="Test") == (["Test 'abcd' must be exactly 3 characters"], [])
+    assert rules_instance.check(
+        "rule-func:string_of_length:3", "abc", {}, label="Test"
+    ) == ([], [])
+    assert rules_instance.check(
+        "rule-func:string_of_length:3", "abcd", {}, label="Test"
+    ) == (["Test 'abcd' must be exactly 3 characters"], [])
 
     # Test that the function correctly handles rule-func-warning
-    assert rules_instance.check("rule-func-warning:string_of_length:3", "abc", {}, label="Test") == ([], [])
-    assert rules_instance.check("rule-func-warning:string_of_length:3", "abcd", {}, label="Test") == ([], ["Test 'abcd' must be exactly 3 characters"])
+    assert rules_instance.check(
+        "rule-func-warning:string_of_length:3", "abc", {}, label="Test"
+    ) == ([], [])
+    assert rules_instance.check(
+        "rule-func-warning:string_of_length:3", "abcd", {}, label="Test"
+    ) == ([], ["Test 'abcd' must be exactly 3 characters"])
 
     # Test that the function correctly handles type-rule
     assert rules_instance.check("type-rule:int", 123, {}, label="Test") == ([], [])
-    assert rules_instance.check("type-rule:int", "abc", {}, label="Test") == (["Test Value 'abc' is not of required type: 'int'."], [])
+    assert rules_instance.check("type-rule:int", "abc", {}, label="Test") == (
+        ["Test Value 'abc' is not of required type: 'int'."],
+        [],
+    )
 
     # Test that the function correctly handles regex-warning
-    assert rules_instance.check("regex-warning:^[a-z]+$", "abc", {}, label="Test") == ([], [])
-    assert rules_instance.check("regex-warning:^[a-z]+$", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."])
+    assert rules_instance.check("regex-warning:^[a-z]+$", "abc", {}, label="Test") == (
+        [],
+        [],
+    )
+    assert rules_instance.check("regex-warning:^[a-z]+$", "ABC", {}, label="Test") == (
+        [],
+        ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."],
+    )
 
     # Test that the function correctly handles regex
     assert rules_instance.check("regex:^[a-z]+$", "abc", {}, label="Test") == ([], [])
-    assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], [])
+    assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (
+        ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."],
+        [],
+    )
 
     # Test that the function correctly handles regex-rule-warning
-    assert rules_instance.check("regex-rule-warning:integer", "123", {}, label="Test") == ([], [])
-    assert rules_instance.check("regex-rule-warning:integer", "123.45", {}, label="Test") == ([], ["Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'."])
+    assert rules_instance.check(
+        "regex-rule-warning:integer", "123", {}, label="Test"
+    ) == ([], [])
+    assert rules_instance.check(
+        "regex-rule-warning:integer", "123.45", {}, label="Test"
+    ) == (
+        [],
+        [
+            "Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'."
+        ],
+    )
 
     # Test that the function correctly handles regex-rule
-    assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ([], [])
-    assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (["Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'."], [])
+    assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == (
+        [],
+        [],
+    )
+    assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (
+        [
+            "Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'."
+        ],
+        [],
+    )
 
     # Test that correct exceptions are raised when the rule or regex is not found
     with pytest.raises(Exception) as e_info:
@@ -512,4 +765,7 @@ def test_check():
 
     with pytest.raises(Exception) as e_info:
         rules_instance.check("regex-rule:nonexistent", "abc", {}, label="Test")
-    assert str(e_info.value) == "Regex rule not found with rule ID: regex-rule:nonexistent."
+    assert (
+        str(e_info.value)
+        == "Regex rule not found with rule ID: regex-rule:nonexistent."
+    )

From 9621b929ab3706b37df7ca9cc992d40fe687a542 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 1 Oct 2024 11:53:38 +0100
Subject: [PATCH 5/5] Change output to use original value in error messages
 (needed for python 3.10 and earlier)

---
 checksit/rules/rule_funcs.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py
index cf714cd..d2dc95d 100644
--- a/checksit/rules/rule_funcs.py
+++ b/checksit/rules/rule_funcs.py
@@ -394,6 +394,7 @@ def check_utc_date_iso_format(value, context, extras=None, label=""):
     """
     errors = []
 
+    original_value = value
     if sys.version_info < (3,11): # python datetime changed its recognition of ISO format from 3.11 onward
         if value.endswith("Z"):
             value = value.replace("Z", "+00:00")
@@ -402,9 +403,9 @@ def check_utc_date_iso_format(value, context, extras=None, label=""):
     try:
         dt = datetime.fromisoformat(value)
         if (dt.utcoffset() != None) and (dt.utcoffset().total_seconds() != 0):
-            errors.append(f"{label} Date string '{value}' not in UTC.")
+            errors.append(f"{label} Date string '{original_value}' not in UTC.")
     except ValueError:
-        errors.append(f"{label} Date string '{value}' not in ISO 8601 format.")
+        errors.append(f"{label} Date string '{original_value}' not in ISO 8601 format.")
     except:
         raise