From d4861ebe2e47ea0b90909096f581f8b37d94771a Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 20 Aug 2024 17:34:05 +0100 Subject: [PATCH 1/5] Changes due to clarification of standard --- checksit/generic.py | 53 ++++++++++++++----- checksit/rules/rule_funcs.py | 45 ++++++++++++++++ .../ncas-radar-1.0.0/coordinate-variables.yml | 2 +- .../groups/ncas-radar-1.0.0/global-attrs.yml | 8 +-- .../sensor-pointing-variables.yml | 6 ++- 5 files changed, 95 insertions(+), 19 deletions(-) diff --git a/checksit/generic.py b/checksit/generic.py index 3742769..7897ff9 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -319,10 +319,22 @@ def check_var(dct, variable, defined_attrs, rules_attrs=None, skip_spellcheck=Fa attr_key = attr.split(":")[0] attr_rule = ":".join(attr.split(":")[1:]) if attr_key not in dct["variables"][variable]: - errors.append( - f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. " - f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}" - ) + if not ( + attr_key == "standard_name" and attr_rule.split(":")[1] == "allow-proposed" + ): + errors.append( + f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. " + f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}" + ) + else: + rule_errors, rule_warnings = rules.check( + attr_rule, + dct["variables"][variable].get(attr_key), + context=dct["variables"][variable].get("proposed_standard_name"), + label=f"[variables:******:{variable}]***", + ) + errors.extend(rule_errors) + warnings.extend(rule_warnings) elif is_undefined(dct["variables"][variable].get(attr_key)): errors.append( f"[variable:**************:{variable}]: No value defined for attribute '{attr_key}'." @@ -386,14 +398,31 @@ def check_var(dct, variable, defined_attrs, rules_attrs=None, skip_spellcheck=Fa attr_key = attr.split(":")[0] attr_rule = ":".join(attr.split(":")[1:]) if attr_key not in dct["variables"][variable]: - errors.append( - f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. " - f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}" - ) - elif is_undefined(dct["variables"][variable].get(attr_key)): - errors.append( - f"[variable:**************:{variable}]: No value defined for attribute '{attr_key}'." - ) + if not ( + attr_key == "standard_name" and attr_rule.split(":")[1] == "allow-proposed" + ): + errors.append( + f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. " + f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}" + ) + else: + rule_errors, rule_warnings = rules.check( + attr_rule, + dct["variables"][variable].get(attr_key), + context=dct["variables"][variable].get("proposed_standard_name"), + label=f"[variables:******:{variable}]***", + ) + errors.extend(rule_errors) + warnings.extend(rule_warnings) + #if attr_key not in dct["variables"][variable]: + # errors.append( + # f"[variable:**************:{variable}]: Attribute '{attr_key}' does not exist. " + # f"{search_close_match(attr_key, dct['variables'][variable].keys()) if not skip_spellcheck else ''}" + # ) + #elif is_undefined(dct["variables"][variable].get(attr_key)): + # errors.append( + # f"[variable:**************:{variable}]: No value defined for attribute '{attr_key}'." + # ) elif attr_rule.startswith("rule-func:same-type-as"): var_checking_against = attr_rule.split(":")[-1] rule_errors, rule_warnings = rules.check( diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index 118fc67..cf714cd 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -4,6 +4,7 @@ import requests from urllib.request import urlopen import numpy as np +import sys from . import processors from ..config import get_config @@ -384,3 +385,47 @@ def check_qc_flags(value, context, extras=None, label=""): ) return errors + + +def check_utc_date_iso_format(value, context, extras=None, label=""): + """ + Check date given is in ISO 8601 format and in UTC + value - date string + """ + errors = [] + + if sys.version_info < (3,11): # python datetime changed its recognition of ISO format from 3.11 onward + if value.endswith("Z"): + value = value.replace("Z", "+00:00") + elif re.fullmatch(r"(\+|-)\d{4}", value[-5:]): + value = f"{value[:-2]}:{value[-2:]}" + try: + dt = datetime.fromisoformat(value) + if (dt.utcoffset() != None) and (dt.utcoffset().total_seconds() != 0): + errors.append(f"{label} Date string '{value}' not in UTC.") + except ValueError: + errors.append(f"{label} Date string '{value}' not in ISO 8601 format.") + except: + raise + + return errors + + +def allow_proposed(value, context, extras=None, label=""): + """ + Check for proposed_standard_name if standard_name not given + value - value of the standard_name attribute + context - value of the proposed_standard_name attribute + extras - value to match + """ + errors = [] + + if extras != None and isinstance(extras, list): + extras = extras[0] + + if value != extras and context != extras: + errors.append(f"{label} does not contain standard_name or proposed_standard_name with value '{extras}'") + + return errors + + diff --git a/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml b/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml index 08fab02..f2e4166 100644 --- a/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml +++ b/specs/groups/ncas-radar-1.0.0/coordinate-variables.yml @@ -17,10 +17,10 @@ var-requires1: - range defined_attrs: - type:float - - standard_name:projection_range_coordinate - axis:radial_range_coordinate rules_attrs: - dimension: rule-func:match-one-of:range|sweep, range - units: rule-func:match-one-of:metres|meters - long_name: rule-func:string-of-length:5+ - spacing_is_constant: rule-func:match-one-of:true|false + - standard_name: rule-func:allow-proposed:projection_range_coordinate diff --git a/specs/groups/ncas-radar-1.0.0/global-attrs.yml b/specs/groups/ncas-radar-1.0.0/global-attrs.yml index 5c15635..f1935ad 100644 --- a/specs/groups/ncas-radar-1.0.0/global-attrs.yml +++ b/specs/groups/ncas-radar-1.0.0/global-attrs.yml @@ -31,14 +31,14 @@ required-gloabl-attrs: creator_email: regex-rule:valid-email creator_url: regex-rule:valid-url||rule-func-warning:validate-orcid-ID processing_level: rule-func:match-one-of:1|2|3 - last_revised_date: regex-rule:datetime + last_revised_date: rule-func:check-utc-date-iso-format project_principal_investigator_email: regex-rule:valid-email project_principal_investigator_url: regex-rule:valid-url||rule-func-warning:validate-orcid-ID platform: rule-func:ceda-platform||rule-func-warning:ncas-platform deployment_mode: rule-func:match-one-of:land|sea|air - time_coverage_start: regex-rule:datetimeZ - time_coverage_end: regex-rule:datetimeZ + time_coverage_start: rule-func:check-utc-date-iso-format + time_coverage_end: rule-func:check-utc-date-iso-format regex_attrs: - Conventions: ^(NCAS-Radar-1\.0|CfRadial-1\.4) (NCAS-Radar-1\.0|CfRadial-1\.4) instrument_parameters radar_parameters radar_calibration$ + Conventions: ^(NCAS-Radar-1\.0 CfRadial-1\.4|CfRadial-1\.4 NCAS-Radar-1\.0)( instrument_parameters| radar_parameters| lidar_parameters| radar_calibration| lidar_calibration| platform_velocity| geometry_correction)*$ product_version: ^v\d+\.\d+\.\d+$ diff --git a/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml b/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml index 3985161..fe0e5b3 100644 --- a/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml +++ b/specs/groups/ncas-radar-1.0.0/sensor-pointing-variables.yml @@ -7,9 +7,10 @@ var-requires0: - type:float - dimension:time - units:degrees - - standard_name:ray_azimuth_angle - long_name:azimuth_angle_from_true_north - axis:radial_azimuth_coordinate + rules_attrs: + - standard_name: rule-func:allow-proposed:ray_azimuth_angle var-requires1: func: checksit.generic.check_var params: @@ -19,6 +20,7 @@ var-requires1: - type:float - dimension:time - units:degrees - - standard_name:ray_elevation_angle - long_name:elevation_angle_from_horizontal_plane - axis:radial_elevation_coordinate + rules_attrs: + - standard_name: rule-func:allow-proposed:ray_elevation_angle From 98c0dcacf33178e3a292f52d3c7544a7315e6590 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 1 Oct 2024 10:34:38 +0100 Subject: [PATCH 2/5] Change the non-existent URL in tests --- tests/test_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index f24c117..2485593 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -157,7 +157,7 @@ def test_url_checker(): assert crf.url_checker("https://www.example.com", {}, label="Test") == [] # Test that the function correctly handles an unreachable URL - assert crf.url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"] + assert crf.url_checker("fake://www.nonexistenturl.com", {}, label="Test") == ["Test 'fake://www.nonexistenturl.com' is not a reachable url"] # Test that the function correctly handles an existing but unreachable URL assert crf.url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"] From fcde548a51f66ca6d65a4acc2398f79660c5f8c3 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 1 Oct 2024 11:11:12 +0100 Subject: [PATCH 3/5] Add functions to test utc_date_iso_format and allow_proposed rules --- tests/test_rules.py | 62 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/test_rules.py b/tests/test_rules.py index 2485593..3d8d892 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -223,6 +223,68 @@ def test_ncas_platform(): # Test function returns error for example platform assert crf.ncas_platform("example", {}, label='Test') == ["Test 'example' is not a valid NCAS platform"] +def test_utc_date_iso_format(): + # Test function returns no errors for an ISO formatted date in UTC, with "Z", "+0000", and "+00:00" time zone identifiers + assert crf.check_utc_date_iso_format("2024-11-17T01:23:45Z", {}, label="Test") == [] + assert ( + crf.check_utc_date_iso_format("2024-11-17T01:23:45+0000", {}, label="Test") + == [] + ) + assert ( + crf.check_utc_date_iso_format("2024-11-17T01:23:45 +0000", {}, label="Test") + == [] + ) + assert ( + crf.check_utc_date_iso_format("2024-11-17T01:23:45+00:00", {}, label="Test") + == [] + ) + assert ( + crf.check_utc_date_iso_format("2024-11-17 01:23:45+00:00", {}, label="Test") + == [] + ) + assert ( + crf.check_utc_date_iso_format( + "2024-11-17T01:23:45.678901+00:00", {}, label="Test" + ) + == [] + ) + # Test function returns error for ISO formatted date NOT in UTC + assert crf.check_utc_date_iso_format( + "2024-11-17T01:23:45+0100", {}, label="Test" + ) == ["Test Date string '2024-11-17T01:23:45+0100' not in UTC."] + assert crf.check_utc_date_iso_format( + "2024-11-17T01:23:45-01:00", {}, label="Test" + ) == ["Test Date string '2024-11-17T01:23:45-01:00' not in UTC."] + # Test function returns error for non ISO formatted date in UTC + assert crf.check_utc_date_iso_format("2024/11/17T01:23:45Z", {}, label="Test") == [ + "Test Date string '2024/11/17T01:23:45Z' not in ISO 8601 format." + ] + assert crf.check_utc_date_iso_format( + "20241117T01-23-45+0000", {}, label="Test" + ) == ["Test Date string '20241117T01-23-45+0000' not in ISO 8601 format."] + # Test function returns error for something that is very much not a date + assert crf.check_utc_date_iso_format( + "11th November 2024 at 23 minutes and 45 seconds past 1 in the morning", + {}, + label="Test", + ) == [ + "Test Date string '11th November 2024 at 23 minutes and 45 seconds past 1 in the morning' not in ISO 8601 format." + ] + + +def test_allow_proposed(): + # Test function returns no errors when value of "extra" matches value of "value" + assert crf.allow_proposed("name1", None, extras="name1", label="Test") == [] + assert crf.allow_proposed("name1", None, extras=["name1"], label="Test") == [] + assert crf.allow_proposed("name1", "name2", extras="name1", label="Test") == [] + # Test function returns no errors when value of "extra" matches value of "context" + assert crf.allow_proposed(None, "name2", extras="name2", label="Test") == [] + # Test function returns errors when there is no match + assert crf.allow_proposed("name1", "name2", extras="name3", label="Test") == [ + "Test does not contain standard_name or proposed_standard_name with value 'name3'" + ] + + # rules.py def _test_type(_type, value): return r.check(f"type-rule:{_type}", value) From 7ee77417f333843dbe68ebcd3ae03f8b1b70a929 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 1 Oct 2024 11:11:41 +0100 Subject: [PATCH 4/5] Format file with black --- tests/test_rules.py | 660 ++++++++++++++++++++++++++++++-------------- 1 file changed, 458 insertions(+), 202 deletions(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index 3d8d892..e548183 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -6,6 +6,7 @@ from checksit.rules import rules as r import checksit.rules.rule_funcs as crf + # rule_funcs.py def test_match_file_name(): file_path = "happy_netcdf" @@ -20,136 +21,261 @@ def test_match_file_name(): def test_string_of_length(): # Test that the function correctly handles strings of the minimum length - assert crf.string_of_length('abc', {}, ['3'], 'Test') == [] - assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == [] + assert crf.string_of_length("abc", {}, ["3"], "Test") == [] + assert crf.string_of_length("abcd", {}, ["3+"], "Test") == [] # Test that the function correctly handles strings shorter than the minimum length - assert crf.string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"] - assert crf.string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"] + assert crf.string_of_length("ab", {}, ["3"], "Test") == [ + "Test 'ab' must be exactly 3 characters" + ] + assert crf.string_of_length("ab", {}, ["3+"], "Test") == [ + "Test 'ab' must be at least 3 characters" + ] # Test that the function correctly handles strings longer than the minimum length - assert crf.string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"] - assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == [] + assert crf.string_of_length("abcd", {}, ["3"], "Test") == [ + "Test 'abcd' must be exactly 3 characters" + ] + assert crf.string_of_length("abcd", {}, ["3+"], "Test") == [] # Test that the function correctly handles empty strings - assert crf.string_of_length('', {}, ['0'], 'Test') == [] - assert crf.string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"] - assert crf.string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"] + assert crf.string_of_length("", {}, ["0"], "Test") == [] + assert crf.string_of_length("", {}, ["1"], "Test") == [ + "Test '' must be exactly 1 characters" + ] + assert crf.string_of_length("", {}, ["1+"], "Test") == [ + "Test '' must be at least 1 characters" + ] def test_match_one_of(): # Test that the function correctly handles valid inputs - assert crf.match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == [] + assert crf.match_one_of("apple", {}, ["apple|banana|orange"], "Test") == [] # Test that the function correctly handles invalid inputs - assert crf.match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_of("kiwi", {}, ["apple|banana|orange"], "Test") == [ + "Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'" + ] # Test that the function correctly handles empty strings - assert crf.match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_of("", {}, ["apple|banana|orange"], "Test") == [ + "Test '' must be one of: '['apple', 'banana', 'orange']'" + ] def test_match_one_or_more_of(): # Test that the function correctly handles valid inputs - assert crf.match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == [] - assert crf.match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == [] + assert ( + crf.match_one_or_more_of("apple,banana", {}, ["apple|banana|orange"], "Test") + == [] + ) + assert crf.match_one_or_more_of("apple", {}, ["apple|banana|orange"], "Test") == [] # Test that the function correctly handles invalid inputs - assert crf.match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] - assert crf.match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_or_more_of( + "apple,kiwi", {}, ["apple|banana|orange"], "Test" + ) == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_or_more_of("kiwi", {}, ["apple|banana|orange"], "Test") == [ + "Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'" + ] # Test that the function correctly handles empty strings - assert crf.match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_or_more_of("", {}, ["apple|banana|orange"], "Test") == [ + "Test '' must be one or more of: '['apple', 'banana', 'orange']'" + ] def test_validate_image_date_time(): # Test that the function correctly handles valid date-time strings - assert crf.validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == [] - assert crf.validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == [] + assert crf.validate_image_date_time("2022:01:01 12:00:00", {}, label="Test") == [] + assert ( + crf.validate_image_date_time("2022:01:01 12:00:00.000000", {}, label="Test") + == [] + ) # Test that the function correctly handles invalid date-time strings - assert crf.validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] - assert crf.validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] - assert crf.validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] - assert crf.validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time("2022-01-01 12:00:00", {}, label="Test") == [ + "Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s" + ] + assert crf.validate_image_date_time("2022:01:01 12:00", {}, label="Test") == [ + "Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s" + ] + assert crf.validate_image_date_time("2022:01:01", {}, label="Test") == [ + "Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s" + ] + assert crf.validate_image_date_time("2022:01:01 12:00:00.00", {}, label="Test") == [ + "Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s" + ] # Test that the function correctly handles empty strings - assert crf.validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time("", {}, label="Test") == [ + "Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s" + ] def test_validate_orcid_ID(): # Test that the function correctly handles valid ORCID IDs - assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == [] - assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == [] - assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == [] + assert ( + crf.validate_orcid_ID("https://orcid.org/0000-0002-1825-0097", {}, label="Test") + == [] + ) + assert ( + crf.validate_orcid_ID("https://orcid.org/1234-5678-9012-3456", {}, label="Test") + == [] + ) + assert ( + crf.validate_orcid_ID("https://orcid.org/1234-5678-9012-345X", {}, label="Test") + == [] + ) # Test that the function correctly handles ORCID IDs with incorrect lengths - assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID( + "https://orcid.org/0000-0002-1825-009", {}, label="Test" + ) == [ + "Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] + assert crf.validate_orcid_ID( + "https://orcid.org/1234-5678-9012-34567", {}, label="Test" + ) == [ + "Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] # Test that the function correctly handles ORCID IDs with incorrect formats - assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID( + "https://orcid.org/0000-0002-1825-009Z", {}, label="Test" + ) == [ + "Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] + assert crf.validate_orcid_ID( + "https://orcid.org/1234-5678-9012-34X5", {}, label="Test" + ) == [ + "Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] + assert crf.validate_orcid_ID( + "https://orcid.org/1234-5678-9012-3456-", {}, label="Test" + ) == [ + "Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] + assert crf.validate_orcid_ID( + "https://orcid.org/1234-5678-9012-3456X", {}, label="Test" + ) == [ + "Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] # Test that the function correctly handles empty strings - assert crf.validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID("", {}, label="Test") == [ + "Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX" + ] def test_list_of_names(): # Test that the function correctly handles valid names - assert crf.list_of_names('Doe, John', {}, label='Test') == [] - assert crf.list_of_names('Doe, John J.', {}, label='Test') == [] - assert crf.list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == [] + assert crf.list_of_names("Doe, John", {}, label="Test") == [] + assert crf.list_of_names("Doe, John J.", {}, label="Test") == [] + assert crf.list_of_names(["Doe, John", "Smith, Jane"], {}, label="Test") == [] # Test that the function correctly handles names with incorrect formats - assert crf.list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format , or , where appropriate"] - assert crf.list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format , or , where appropriate"] - assert crf.list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format , or , where appropriate"] + assert crf.list_of_names("John Doe", {}, label="Test") == [ + "Test 'John Doe' should be of the format , or , where appropriate" + ] + assert crf.list_of_names("Doe John", {}, label="Test") == [ + "Test 'Doe John' should be of the format , or , where appropriate" + ] + assert crf.list_of_names(["Doe, John", "Jane Smith"], {}, label="Test") == [ + "Test '['Doe, John', 'Jane Smith']' should be of the format , or , where appropriate" + ] # Test that the function correctly handles names with invalid characters - assert crf.list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"] - assert crf.list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"] - assert crf.list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert crf.list_of_names("Doe, J0hn", {}, label="Test") == [ + "Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate" + ] + assert crf.list_of_names("Doe, John!", {}, label="Test") == [ + "Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate" + ] + assert crf.list_of_names(["Doe, John", "Smith, J@ne"], {}, label="Test") == [ + "Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate" + ] # Test that the function correctly handles empty strings - assert crf.list_of_names('', {}, label='Test') == ["Test '' should be of the format , or , where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"] - assert crf.list_of_names([], {}, label='Test') == [] + assert crf.list_of_names("", {}, label="Test") == [ + "Test '' should be of the format , or , where appropriate", + "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate", + ] + assert crf.list_of_names([], {}, label="Test") == [] def test_headline(): # Test that the function correctly handles valid headlines - assert crf.headline('This is a valid headline.', {}, label='Test') == [] - assert crf.headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == [] - assert crf.headline('This headline is exactly 10 characters.', {}, label='Test') == [] + assert crf.headline("This is a valid headline.", {}, label="Test") == [] + assert ( + crf.headline( + "This headline is exactly 150 characters long " + "a" * 105, + {}, + label="Test", + ) + == [] + ) + assert ( + crf.headline("This headline is exactly 10 characters.", {}, label="Test") == [] + ) # Test that the function correctly handles headlines longer than 150 characters - assert crf.headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"] + assert crf.headline( + "This headline is longer than 150 characters." + "a" * 120, {}, label="Test" + ) == [ + "Test 'This headline is longer than 150 characters." + + "a" * 120 + + "' should contain no more than one sentence" + ] # Test that the function correctly handles headlines with more than one sentence - assert crf.headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"] + assert crf.headline( + "This is a headline. It has two sentences.", {}, label="Test" + ) == [ + "Test 'This is a headline. It has two sentences.' should contain no more than one sentence" + ] # Test that the function correctly handles headlines that do not start with a capital letter - assert crf.headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"] + assert crf.headline( + "this headline does not start with a capital letter.", {}, label="Test" + ) == [ + "Test 'this headline does not start with a capital letter.' should start with a capital letter" + ] # Test that the function correctly handles headlines shorter than 10 characters - assert crf.headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"] + assert crf.headline("Too short", {}, label="Test") == [ + "Test 'Too short' should be at least 10 characters" + ] # Test that the function correctly handles empty strings - assert crf.headline('', {}, label='Test') == ["Test '' should not be empty"] + assert crf.headline("", {}, label="Test") == ["Test '' should not be empty"] def test_title_check(): # Test that the function correctly handles titles that match the filename - assert crf.title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == [] - assert crf.title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == [] + assert ( + crf.title_check("happy_netcdf", "/path/to/file/happy_netcdf", label="Test") + == [] + ) + assert ( + crf.title_check( + "happy_NetCDF.nc", "/path/to/file/happy_NetCDF.nc", label="Test" + ) + == [] + ) # Test that the function correctly handles titles that do not match the filename - assert crf.title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"] - assert crf.title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"] + assert crf.title_check( + "sad_netcdf", "/path/to/file/happy_netcdf", label="Test" + ) == ["Test 'sad_netcdf' must match the name of the file"] + assert crf.title_check( + "happy_NetCDF.nc", "/path/to/file/sad_NetCDF.nc", label="Test" + ) == ["Test 'happy_NetCDF.nc' must match the name of the file"] # Test that the function correctly handles empty titles - assert crf.title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"] + assert crf.title_check("", "/path/to/file/happy_netcdf", label="Test") == [ + "Test '' must match the name of the file" + ] def test_url_checker(): @@ -157,10 +283,14 @@ def test_url_checker(): assert crf.url_checker("https://www.example.com", {}, label="Test") == [] # Test that the function correctly handles an unreachable URL - assert crf.url_checker("fake://www.nonexistenturl.com", {}, label="Test") == ["Test 'fake://www.nonexistenturl.com' is not a reachable url"] + assert crf.url_checker("fake://www.nonexistenturl.com", {}, label="Test") == [ + "Test 'fake://www.nonexistenturl.com' is not a reachable url" + ] # Test that the function correctly handles an existing but unreachable URL - assert crf.url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"] + assert crf.url_checker( + "https://www.example.com/nonexistentpage", {}, label="Test" + ) == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"] # Test that the function correctly handles an empty URL assert crf.url_checker("", {}, label="Test") == ["Test '' is not a reachable url"] @@ -168,60 +298,107 @@ def test_url_checker(): def test_relation_url_checker(): # Test that the function correctly handles valid inputs - assert crf.relation_url_checker('relation https://example.com', {}, label='Test') == [] + assert ( + crf.relation_url_checker("relation https://example.com", {}, label="Test") == [] + ) # Test that the function correctly handles inputs without a space - assert crf.relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"] + assert crf.relation_url_checker( + "relationhttps://example.com", {}, label="Test" + ) == ["Test 'relationhttps://example.com' should contain a space before the url"] # Test that the function correctly handles inputs with an invalid URL - assert crf.relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"] + assert crf.relation_url_checker("relation https://", {}, label="Test") == [ + "Test 'https://' is not a reachable url" + ] # Test that the function correctly handles empty strings - assert crf.relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"] + assert crf.relation_url_checker("", {}, label="Test") == [ + "Test '' should contain a space before the url" + ] def test_latitude(): # Test that the function correctly handles valid latitudes - assert crf.latitude('45.1234', {}, label='Test') == [] - assert crf.latitude('-90.0000', {}, label='Test') == [] - assert crf.latitude('90.0000', {}, label='Test') == [] + assert crf.latitude("45.1234", {}, label="Test") == [] + assert crf.latitude("-90.0000", {}, label="Test") == [] + assert crf.latitude("90.0000", {}, label="Test") == [] # Test that the function correctly handles invalid latitudes - assert crf.latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "] - assert crf.latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "] - assert crf.latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "] + assert crf.latitude("90.0001", {}, label="Test") == [ + "Test '90.0001' must be within -90 and +90 " + ] + assert crf.latitude("-90.0001", {}, label="Test") == [ + "Test '-90.0001' must be within -90 and +90 " + ] + assert crf.latitude("100.0000", {}, label="Test") == [ + "Test '100.0000' must be within -90 and +90 " + ] def test_longitude(): # Test that the function correctly handles valid longitudes - assert crf.longitude('45.1234', {}, label='Test') == [] - assert crf.longitude('-180.0000', {}, label='Test') == [] - assert crf.longitude('180.0000', {}, label='Test') == [] + assert crf.longitude("45.1234", {}, label="Test") == [] + assert crf.longitude("-180.0000", {}, label="Test") == [] + assert crf.longitude("180.0000", {}, label="Test") == [] # Test that the function correctly handles invalid longitudes - assert crf.longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "] - assert crf.longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "] - assert crf.longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "] + assert crf.longitude("180.0001", {}, label="Test") == [ + "Test '180.0001' must be within -180 and +180 " + ] + assert crf.longitude("-180.0001", {}, label="Test") == [ + "Test '-180.0001' must be within -180 and +180 " + ] + assert crf.longitude("200.0000", {}, label="Test") == [ + "Test '200.0000' must be within -180 and +180 " + ] def test_ceda_platform(): # Test function returns no errors for all NCAS platforms - for plat in ["bt-tower-t35", "cao", "cao-sparsholt", "cdao", "cdao-frongoch", "cvao", "faam", "iao", "wao"]: + for plat in [ + "bt-tower-t35", + "cao", + "cao-sparsholt", + "cdao", + "cdao-frongoch", + "cvao", + "faam", + "iao", + "wao", + ]: assert crf.ceda_platform(plat, {}) == [] # Test function returns no errors for a non-NCAS platform assert crf.ceda_platform("netheravon", {}) == [] # Test function returns error for example platform - assert crf.ceda_platform("example", {}, label='Test') == ["Test 'example' is not a valid platform in the CEDA catalogue"] + assert crf.ceda_platform("example", {}, label="Test") == [ + "Test 'example' is not a valid platform in the CEDA catalogue" + ] def test_ncas_platform(): # Test function returns no errors for all NCAS platforms - for plat in ["bt-tower-t35", "cao", "cao-sparsholt", "cdao", "cdao-frongoch", "cvao", "faam", "iao", "wao"]: + for plat in [ + "bt-tower-t35", + "cao", + "cao-sparsholt", + "cdao", + "cdao-frongoch", + "cvao", + "faam", + "iao", + "wao", + ]: assert crf.ncas_platform(plat, {}) == [] # Test function returns error for a non-NCAS platform - assert crf.ncas_platform("netheravon", {}, label='Test') == ["Test 'netheravon' is not a valid NCAS platform"] + assert crf.ncas_platform("netheravon", {}, label="Test") == [ + "Test 'netheravon' is not a valid NCAS platform" + ] # Test function returns error for example platform - assert crf.ncas_platform("example", {}, label='Test') == ["Test 'example' is not a valid NCAS platform"] + assert crf.ncas_platform("example", {}, label="Test") == [ + "Test 'example' is not a valid NCAS platform" + ] + def test_utc_date_iso_format(): # Test function returns no errors for an ISO formatted date in UTC, with "Z", "+0000", and "+00:00" time zone identifiers @@ -289,13 +466,14 @@ def test_allow_proposed(): def _test_type(_type, value): return r.check(f"type-rule:{_type}", value) + def test_type_rules(): tt = _test_type _type = "number" for value in 3.4, -4: assert tt(_type, value) == ([], []) - print('sarah test', tt(_type, value)) + print("sarah test", tt(_type, value)) for value in "3", "3.4", ["hi"]: assert tt(_type, value) != ([], []) @@ -321,189 +499,264 @@ def test_type_rules(): for value in 3, 4.5, ["hi"]: assert tt(_type, value) != ([], []) + # static regex rule tests @pytest.fixture def rules(): return r.static_regex_rules + def test_integer_rule(rules): - assert re.fullmatch(rules['integer']['regex-rule'], '123') - assert re.fullmatch(rules['integer']['regex-rule'], '-123') - assert not re.fullmatch(rules['integer']['regex-rule'], '123.45') - assert not re.fullmatch(rules['integer']['regex-rule'], 'abc') - assert not re.fullmatch(rules['integer']['regex-rule'], '') + assert re.fullmatch(rules["integer"]["regex-rule"], "123") + assert re.fullmatch(rules["integer"]["regex-rule"], "-123") + assert not re.fullmatch(rules["integer"]["regex-rule"], "123.45") + assert not re.fullmatch(rules["integer"]["regex-rule"], "abc") + assert not re.fullmatch(rules["integer"]["regex-rule"], "") + def test_valid_email_rule(rules): - assert re.fullmatch(rules['valid-email']['regex-rule'], 'test@example.com') - assert re.fullmatch(rules['valid-email']['regex-rule'], 'test.test@example.com') - assert not re.fullmatch(rules['valid-email']['regex-rule'], 'test@example') - assert not re.fullmatch(rules['valid-email']['regex-rule'], 'test@.com') - assert not re.fullmatch(rules['valid-email']['regex-rule'], 'test@com') + assert re.fullmatch(rules["valid-email"]["regex-rule"], "test@example.com") + assert re.fullmatch(rules["valid-email"]["regex-rule"], "test.test@example.com") + assert not re.fullmatch(rules["valid-email"]["regex-rule"], "test@example") + assert not re.fullmatch(rules["valid-email"]["regex-rule"], "test@.com") + assert not re.fullmatch(rules["valid-email"]["regex-rule"], "test@com") + def test_valid_url_rule(rules): - assert re.fullmatch(rules['valid-url']['regex-rule'], 'https://example.com') - assert re.fullmatch(rules['valid-url']['regex-rule'], 'http://example.com') - assert not re.fullmatch(rules['valid-url']['regex-rule'], 'htp://example.com') - assert not re.fullmatch(rules['valid-url']['regex-rule'], 'https:/example.com') - assert not re.fullmatch(rules['valid-url']['regex-rule'], 'https://example') + assert re.fullmatch(rules["valid-url"]["regex-rule"], "https://example.com") + assert re.fullmatch(rules["valid-url"]["regex-rule"], "http://example.com") + assert not re.fullmatch(rules["valid-url"]["regex-rule"], "htp://example.com") + assert not re.fullmatch(rules["valid-url"]["regex-rule"], "https:/example.com") + assert not re.fullmatch(rules["valid-url"]["regex-rule"], "https://example") + def test_valid_url_or_na_rule(rules): - assert re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'https://example.com') - assert re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'http://example.com') - assert re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'N/A') - assert not re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'htp://example.com') - assert not re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'https:/example.com') - assert not re.fullmatch(rules['valid-url-or-na']['regex-rule'], 'nan') + assert re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "https://example.com") + assert re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "http://example.com") + assert re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "N/A") + assert not re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "htp://example.com") + assert not re.fullmatch( + rules["valid-url-or-na"]["regex-rule"], "https:/example.com" + ) + assert not re.fullmatch(rules["valid-url-or-na"]["regex-rule"], "nan") + def test_match_vN_M_rule(rules): - assert re.fullmatch(rules['match:vN.M']['regex-rule'], 'v1.0') - assert re.fullmatch(rules['match:vN.M']['regex-rule'], 'v2.1') - assert not re.fullmatch(rules['match:vN.M']['regex-rule'], 'v10') - assert not re.fullmatch(rules['match:vN.M']['regex-rule'], 'v1.01') - assert not re.fullmatch(rules['match:vN.M']['regex-rule'], 'v.1.0') + assert re.fullmatch(rules["match:vN.M"]["regex-rule"], "v1.0") + assert re.fullmatch(rules["match:vN.M"]["regex-rule"], "v2.1") + assert not re.fullmatch(rules["match:vN.M"]["regex-rule"], "v10") + assert not re.fullmatch(rules["match:vN.M"]["regex-rule"], "v1.01") + assert not re.fullmatch(rules["match:vN.M"]["regex-rule"], "v.1.0") + def test_datetime_rule(rules): - assert re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01T00:00:00') - assert re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01T00:00:00.123') - assert not re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01 00:00:00') - assert not re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01T00:00') - assert not re.fullmatch(rules['datetime']['regex-rule'], '2022-01-01') + assert re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01T00:00:00") + assert re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01T00:00:00.123") + assert not re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01 00:00:00") + assert not re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01T00:00") + assert not re.fullmatch(rules["datetime"]["regex-rule"], "2022-01-01") + def test_datetimeZ_rule(rules): - assert re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00:00Z') - assert re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00:00.000Z') - assert not re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00:00') - assert not re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17 15:00:00Z') - assert not re.fullmatch(rules['datetimeZ']['regex-rule'], '2023-11-17T15:00Z') + assert re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00:00Z") + assert re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00:00.000Z") + assert not re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00:00") + assert not re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17 15:00:00Z") + assert not re.fullmatch(rules["datetimeZ"]["regex-rule"], "2023-11-17T15:00Z") + def test_datetime_or_na_rule(rules): - assert re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01T00:00:00') - assert re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01T00:00:00.123') - assert re.fullmatch(rules['datetime-or-na']['regex-rule'], 'N/A') - assert re.fullmatch(rules['datetime-or-na']['regex-rule'], 'NA') - assert re.fullmatch(rules['datetime-or-na']['regex-rule'], 'Not Applicable') - assert not re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01 00:00:00') - assert not re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01T00:00') - assert not re.fullmatch(rules['datetime-or-na']['regex-rule'], '2022-01-01') + assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "2022-01-01T00:00:00") + assert re.fullmatch( + rules["datetime-or-na"]["regex-rule"], "2022-01-01T00:00:00.123" + ) + assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "N/A") + assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "NA") + assert re.fullmatch(rules["datetime-or-na"]["regex-rule"], "Not Applicable") + assert not re.fullmatch( + rules["datetime-or-na"]["regex-rule"], "2022-01-01 00:00:00" + ) + assert not re.fullmatch(rules["datetime-or-na"]["regex-rule"], "2022-01-01T00:00") + assert not re.fullmatch(rules["datetime-or-na"]["regex-rule"], "2022-01-01") + def test_number_rule(rules): - assert re.fullmatch(rules['number']['regex-rule'], '123.45') - assert re.fullmatch(rules['number']['regex-rule'], '-123.45') - assert not re.fullmatch(rules['number']['regex-rule'], '-123.') - assert not re.fullmatch(rules['number']['regex-rule'], 'abc') - assert not re.fullmatch(rules['number']['regex-rule'], '') - assert not re.fullmatch(rules['number']['regex-rule'], '123.45abc') + assert re.fullmatch(rules["number"]["regex-rule"], "123.45") + assert re.fullmatch(rules["number"]["regex-rule"], "-123.45") + assert not re.fullmatch(rules["number"]["regex-rule"], "-123.") + assert not re.fullmatch(rules["number"]["regex-rule"], "abc") + assert not re.fullmatch(rules["number"]["regex-rule"], "") + assert not re.fullmatch(rules["number"]["regex-rule"], "123.45abc") + def test_location_rule(rules): - assert re.fullmatch(rules['location']['regex-rule'], 'City, Country') - assert re.fullmatch(rules['location']['regex-rule'], 'City, Country, State') - assert not re.fullmatch(rules['location']['regex-rule'], 'City Country') - assert not re.fullmatch(rules['location']['regex-rule'], 'City,') - assert not re.fullmatch(rules['location']['regex-rule'], ',Country') + assert re.fullmatch(rules["location"]["regex-rule"], "City, Country") + assert re.fullmatch(rules["location"]["regex-rule"], "City, Country, State") + assert not re.fullmatch(rules["location"]["regex-rule"], "City Country") + assert not re.fullmatch(rules["location"]["regex-rule"], "City,") + assert not re.fullmatch(rules["location"]["regex-rule"], ",Country") + def test_latitude_image_rule(rules): - assert re.fullmatch(rules['latitude-image']['regex-rule'], '+12.345678') - assert re.fullmatch(rules['latitude-image']['regex-rule'], '-12.345678') - assert not re.fullmatch(rules['latitude-image']['regex-rule'], '123.45') - assert not re.fullmatch(rules['latitude-image']['regex-rule'], '+123.456789') - assert not re.fullmatch(rules['latitude-image']['regex-rule'], '-123.456789') + assert re.fullmatch(rules["latitude-image"]["regex-rule"], "+12.345678") + assert re.fullmatch(rules["latitude-image"]["regex-rule"], "-12.345678") + assert not re.fullmatch(rules["latitude-image"]["regex-rule"], "123.45") + assert not re.fullmatch(rules["latitude-image"]["regex-rule"], "+123.456789") + assert not re.fullmatch(rules["latitude-image"]["regex-rule"], "-123.456789") + def test_longitude_image_rule(rules): - assert re.fullmatch(rules['longitude-image']['regex-rule'], '+123.45678') - assert re.fullmatch(rules['longitude-image']['regex-rule'], '-123.45678') - assert not re.fullmatch(rules['longitude-image']['regex-rule'], '123') - assert not re.fullmatch(rules['longitude-image']['regex-rule'], '+1234.56789') - assert not re.fullmatch(rules['longitude-image']['regex-rule'], '-1234.56789') + assert re.fullmatch(rules["longitude-image"]["regex-rule"], "+123.45678") + assert re.fullmatch(rules["longitude-image"]["regex-rule"], "-123.45678") + assert not re.fullmatch(rules["longitude-image"]["regex-rule"], "123") + assert not re.fullmatch(rules["longitude-image"]["regex-rule"], "+1234.56789") + assert not re.fullmatch(rules["longitude-image"]["regex-rule"], "-1234.56789") + def test_title_rule(rules): - assert re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0.png') - assert re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0.jpg') - assert not re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0.txt') - assert not re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.png') - assert not re.fullmatch(rules['title']['regex-rule'], 'prefix_suffix_2022_v1.0') + assert re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0.png") + assert re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0.jpg") + assert not re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0.txt") + assert not re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.png") + assert not re.fullmatch(rules["title"]["regex-rule"], "prefix_suffix_2022_v1.0") + def test_title_data_product_rule(rules): - assert re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_plot_v1.0.png') - assert re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_photo_v1.0.jpg') - assert not re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_v1.0.txt') - assert not re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_plot_v1.png') - assert not re.fullmatch(rules['title-data-product']['regex-rule'], 'prefix_suffix_2022_plot_v1.0') + assert re.fullmatch( + rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_plot_v1.0.png" + ) + assert re.fullmatch( + rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_photo_v1.0.jpg" + ) + assert not re.fullmatch( + rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_v1.0.txt" + ) + assert not re.fullmatch( + rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_plot_v1.png" + ) + assert not re.fullmatch( + rules["title-data-product"]["regex-rule"], "prefix_suffix_2022_plot_v1.0" + ) + def test_name_format_rule(rules): - assert re.fullmatch(rules['name-format']['regex-rule'], 'Last, First M.') - assert re.fullmatch(rules['name-format']['regex-rule'], 'Last, First') - assert not re.fullmatch(rules['name-format']['regex-rule'], 'First Last') - assert not re.fullmatch(rules['name-format']['regex-rule'], 'Last, First, M.') - assert not re.fullmatch(rules['name-format']['regex-rule'], 'Last First M.') + assert re.fullmatch(rules["name-format"]["regex-rule"], "Last, First M.") + assert re.fullmatch(rules["name-format"]["regex-rule"], "Last, First") + assert not re.fullmatch(rules["name-format"]["regex-rule"], "First Last") + assert not re.fullmatch(rules["name-format"]["regex-rule"], "Last, First, M.") + assert not re.fullmatch(rules["name-format"]["regex-rule"], "Last First M.") + def test_name_characters_rule(rules): - assert re.fullmatch(rules['name-characters']['regex-rule'], 'John_Doe') - assert re.fullmatch(rules['name-characters']['regex-rule'], 'John-Doe') - assert not re.fullmatch(rules['name-characters']['regex-rule'], 'John Doe!') - assert not re.fullmatch(rules['name-characters']['regex-rule'], 'John Doe@') - assert not re.fullmatch(rules['name-characters']['regex-rule'], 'John Doe#') + assert re.fullmatch(rules["name-characters"]["regex-rule"], "John_Doe") + assert re.fullmatch(rules["name-characters"]["regex-rule"], "John-Doe") + assert not re.fullmatch(rules["name-characters"]["regex-rule"], "John Doe!") + assert not re.fullmatch(rules["name-characters"]["regex-rule"], "John Doe@") + assert not re.fullmatch(rules["name-characters"]["regex-rule"], "John Doe#") + def test_altitude_image_warning_rule(rules): - assert re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123 m') - assert re.fullmatch(rules['altitude-image-warning']['regex-rule'], '-123 m') - assert not re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123.45 m') - assert not re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123') - assert not re.fullmatch(rules['altitude-image-warning']['regex-rule'], '123m') + assert re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123 m") + assert re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "-123 m") + assert not re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123.45 m") + assert not re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123") + assert not re.fullmatch(rules["altitude-image-warning"]["regex-rule"], "123m") + def test_altitude_image_rule(rules): - assert re.fullmatch(rules['altitude-image']['regex-rule'], '123.45 m') - assert re.fullmatch(rules['altitude-image']['regex-rule'], '-123.45 m') - assert not re.fullmatch(rules['altitude-image']['regex-rule'], '123') - assert not re.fullmatch(rules['altitude-image']['regex-rule'], '123.45') - assert not re.fullmatch(rules['altitude-image']['regex-rule'], '123.45m') + assert re.fullmatch(rules["altitude-image"]["regex-rule"], "123.45 m") + assert re.fullmatch(rules["altitude-image"]["regex-rule"], "-123.45 m") + assert not re.fullmatch(rules["altitude-image"]["regex-rule"], "123") + assert not re.fullmatch(rules["altitude-image"]["regex-rule"], "123.45") + assert not re.fullmatch(rules["altitude-image"]["regex-rule"], "123.45m") + def test_ncas_email_rule(rules): - assert re.fullmatch(rules['ncas-email']['regex-rule'], 'test@ncas.ac.uk') - assert re.fullmatch(rules['ncas-email']['regex-rule'], 'test.test@ncas.ac.uk') - assert not re.fullmatch(rules['ncas-email']['regex-rule'], 'test@example.com') - assert not re.fullmatch(rules['ncas-email']['regex-rule'], 'test@ncas.com') - assert not re.fullmatch(rules['ncas-email']['regex-rule'], 'test@ncas.ac') + assert re.fullmatch(rules["ncas-email"]["regex-rule"], "test@ncas.ac.uk") + assert re.fullmatch(rules["ncas-email"]["regex-rule"], "test.test@ncas.ac.uk") + assert not re.fullmatch(rules["ncas-email"]["regex-rule"], "test@example.com") + assert not re.fullmatch(rules["ncas-email"]["regex-rule"], "test@ncas.com") + assert not re.fullmatch(rules["ncas-email"]["regex-rule"], "test@ncas.ac") + def test_map_type_rule(): - assert r._map_type_rule('number') == Number - assert r._map_type_rule('integer') == int - assert r._map_type_rule('int') == int - assert r._map_type_rule('float') == float - assert r._map_type_rule('string') == str - assert r._map_type_rule('str') == str + assert r._map_type_rule("number") == Number + assert r._map_type_rule("integer") == int + assert r._map_type_rule("int") == int + assert r._map_type_rule("float") == float + assert r._map_type_rule("string") == str + assert r._map_type_rule("str") == str with pytest.raises(KeyError): - r._map_type_rule('nonexistent') + r._map_type_rule("nonexistent") + def test_check(): rules_instance = r # Test that the function correctly handles rule-func - assert rules_instance.check("rule-func:string_of_length:3", "abc", {}, label="Test") == ([], []) - assert rules_instance.check("rule-func:string_of_length:3", "abcd", {}, label="Test") == (["Test 'abcd' must be exactly 3 characters"], []) + assert rules_instance.check( + "rule-func:string_of_length:3", "abc", {}, label="Test" + ) == ([], []) + assert rules_instance.check( + "rule-func:string_of_length:3", "abcd", {}, label="Test" + ) == (["Test 'abcd' must be exactly 3 characters"], []) # Test that the function correctly handles rule-func-warning - assert rules_instance.check("rule-func-warning:string_of_length:3", "abc", {}, label="Test") == ([], []) - assert rules_instance.check("rule-func-warning:string_of_length:3", "abcd", {}, label="Test") == ([], ["Test 'abcd' must be exactly 3 characters"]) + assert rules_instance.check( + "rule-func-warning:string_of_length:3", "abc", {}, label="Test" + ) == ([], []) + assert rules_instance.check( + "rule-func-warning:string_of_length:3", "abcd", {}, label="Test" + ) == ([], ["Test 'abcd' must be exactly 3 characters"]) # Test that the function correctly handles type-rule assert rules_instance.check("type-rule:int", 123, {}, label="Test") == ([], []) - assert rules_instance.check("type-rule:int", "abc", {}, label="Test") == (["Test Value 'abc' is not of required type: 'int'."], []) + assert rules_instance.check("type-rule:int", "abc", {}, label="Test") == ( + ["Test Value 'abc' is not of required type: 'int'."], + [], + ) # Test that the function correctly handles regex-warning - assert rules_instance.check("regex-warning:^[a-z]+$", "abc", {}, label="Test") == ([], []) - assert rules_instance.check("regex-warning:^[a-z]+$", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."]) + assert rules_instance.check("regex-warning:^[a-z]+$", "abc", {}, label="Test") == ( + [], + [], + ) + assert rules_instance.check("regex-warning:^[a-z]+$", "ABC", {}, label="Test") == ( + [], + ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], + ) # Test that the function correctly handles regex assert rules_instance.check("regex:^[a-z]+$", "abc", {}, label="Test") == ([], []) - assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], []) + assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == ( + ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], + [], + ) # Test that the function correctly handles regex-rule-warning - assert rules_instance.check("regex-rule-warning:integer", "123", {}, label="Test") == ([], []) - assert rules_instance.check("regex-rule-warning:integer", "123.45", {}, label="Test") == ([], ["Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'."]) + assert rules_instance.check( + "regex-rule-warning:integer", "123", {}, label="Test" + ) == ([], []) + assert rules_instance.check( + "regex-rule-warning:integer", "123.45", {}, label="Test" + ) == ( + [], + [ + "Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'." + ], + ) # Test that the function correctly handles regex-rule - assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ([], []) - assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (["Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'."], []) + assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ( + [], + [], + ) + assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == ( + [ + "Test Value '123.45' does not match regex rule: 'integer' - Example valid value '10'." + ], + [], + ) # Test that correct exceptions are raised when the rule or regex is not found with pytest.raises(Exception) as e_info: @@ -512,4 +765,7 @@ def test_check(): with pytest.raises(Exception) as e_info: rules_instance.check("regex-rule:nonexistent", "abc", {}, label="Test") - assert str(e_info.value) == "Regex rule not found with rule ID: regex-rule:nonexistent." + assert ( + str(e_info.value) + == "Regex rule not found with rule ID: regex-rule:nonexistent." + ) From 9621b929ab3706b37df7ca9cc992d40fe687a542 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 1 Oct 2024 11:53:38 +0100 Subject: [PATCH 5/5] Change output to use original value in error messages (needed for python 3.10 and earlier) --- checksit/rules/rule_funcs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index cf714cd..d2dc95d 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -394,6 +394,7 @@ def check_utc_date_iso_format(value, context, extras=None, label=""): """ errors = [] + original_value = value if sys.version_info < (3,11): # python datetime changed its recognition of ISO format from 3.11 onward if value.endswith("Z"): value = value.replace("Z", "+00:00") @@ -402,9 +403,9 @@ def check_utc_date_iso_format(value, context, extras=None, label=""): try: dt = datetime.fromisoformat(value) if (dt.utcoffset() != None) and (dt.utcoffset().total_seconds() != 0): - errors.append(f"{label} Date string '{value}' not in UTC.") + errors.append(f"{label} Date string '{original_value}' not in UTC.") except ValueError: - errors.append(f"{label} Date string '{value}' not in ISO 8601 format.") + errors.append(f"{label} Date string '{original_value}' not in ISO 8601 format.") except: raise