Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improving readability of errors #39

Merged
merged 9 commits into from
Mar 19, 2024
18 changes: 9 additions & 9 deletions checksit/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ def check_var(dct, variable, defined_attrs, attr_rules=[], skip_spellcheck=False
# print(dct["variables"][variable].get(attr_key))
elif not str(dct["variables"][variable].get(attr_key)) == attr_value:
errors.append(
f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition {attr_value}, "
f"not {dct['variables'][variable].get(attr_key).encode('unicode_escape').decode('utf-8')}."
f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition '{attr_value}', "
f"not '{dct['variables'][variable].get(attr_key).encode('unicode_escape').decode('utf-8')}'."
)
for rule_to_check in attr_rules:
if rule_to_check == "rule-func:check-qc-flags":
Expand Down Expand Up @@ -260,8 +260,8 @@ def check_var(dct, variable, defined_attrs, attr_rules=[], skip_spellcheck=False
pass
elif not dct["variables"][variable].get(attr_key) == attr_value:
errors.append(
f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition {attr_value}, "
f"not {dct['variables'][variable].get(attr_key)}."
f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition '{attr_value}', "
f"not '{dct['variables'][variable].get(attr_key)}'."
)

return errors, warnings
Expand All @@ -282,7 +282,7 @@ def check_file_name(file_name, vocab_checks=None, rule_checks=None, **kwargs):
# check instrument name
if "instrument" in vocab_checks.keys():
if vocabs.check(vocab_checks["instrument"], file_name_parts[0], label="_") != []:
errors.append(f"[file name]: Invalid file name format - unknown instrument {file_name_parts[0]}")
errors.append(f"[file name]: Invalid file name format - unknown instrument '{file_name_parts[0]}'")
else:
msg = "No instrument vocab defined in specs"
raise KeyError(msg)
Expand All @@ -303,7 +303,7 @@ def check_file_name(file_name, vocab_checks=None, rule_checks=None, **kwargs):
# could be yyyy, yyyymm, yyyymmdd, yyyymmdd-HH, yyyymmdd-HHMM, yyyymmdd-HHMMSS
# first checks format, then date validity
if not date_regex.match(file_name_parts[2]):
errors.append(f"[file name]: Invalid file name format - bad date format {file_name_parts[2]}")
errors.append(f"[file name]: Invalid file name format - bad date format '{file_name_parts[2]}'")
else:
fmts = ("%Y", "%Y%m", "%Y%m%d", "%Y%m%d-%H", "%Y%m%d-%H%M", "%Y%m%d-%H%M%S")
valid_date_found = False
Expand All @@ -315,20 +315,20 @@ def check_file_name(file_name, vocab_checks=None, rule_checks=None, **kwargs):
except ValueError:
pass
if not valid_date_found:
errors.append(f"[file name]: Invalid file name format - invalid date in file name {file_name_parts[2]}")
errors.append(f"[file name]: Invalid file name format - invalid date in file name '{file_name_parts[2]}'")

# check data product
if "data_product" in vocab_checks.keys():
if vocabs.check(vocab_checks["data_product"], file_name_parts[3], label="_") != []:
errors.append(f"[file name]: Invalid file name format - unknown data product {file_name_parts[3]}")
errors.append(f"[file name]: Invalid file name format - unknown data product '{file_name_parts[3]}'")
else:
msg = "No data product vocab defined in specs"
raise KeyError(msg)

# check version number format
version_component = file_name_parts[-1].split(".nc")[0]
if not re.match(r"^v\d.\d$", version_component):
errors.append(f"[file name]: Invalid file name format - incorrect file version number {version_component}")
errors.append(f"[file name]: Invalid file name format - incorrect file version number '{version_component}'")

# check number of options - max length of splitted file name
if len(file_name_parts) > 8:
Expand Down
6 changes: 3 additions & 3 deletions checksit/rules/rule_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ def ncas_platform(value, context, extras=None, label=""):

if value not in ncas_platforms:
errors.append(f"{label} '{value}' is not a valid NCAS platform")

return errors


def check_qc_flags(value, context, extras=None, label=""):
"""
A function to check flag_values and flag_meanings
Expand All @@ -315,7 +315,7 @@ def check_qc_flags(value, context, extras=None, label=""):

# check flag_values are correctly formatted (should be array of bytes)
if not (isinstance(value, np.ndarray) or isinstance(value, tuple)):
errors.append(f"{label} QC flag_values must be an array or tuple of byte values, not {type(value)}.")
errors.append(f"{label} QC flag_values must be an array or tuple of byte values, not '{type(value)}'.")

# check there are at least two values and they start with 0 and 1
if not len(value) > 2:
Expand Down
95 changes: 74 additions & 21 deletions checksit/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,78 @@ def __init__(self):
"(not applicable)|(not available)"

self.static_regex_rules = {
"integer": r"-?\d+",
"valid-email": r"[^@\s]+@[^@\s]+\.[^\s@]+",
"valid-url": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?",
"valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?)|" + _NOT_APPLICABLE_RULES,
"match:vN.M": r"v\d\.\d",
"datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
"datetime-or-na":
r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?)|" + _NOT_APPLICABLE_RULES,
"number": r"-?\d+(\.\d+)?",
"location": r'(.)+(\,\ )(.)+',
"latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}',
"longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}',
"title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))',
"name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+',
"altitude-image-warning": r'-?\d+\sm', # should be integers only for images
"altitude-image": r'-?\d+(\.\d+)?\sm',
"ncas-email": r'[^@\s][email protected]'
"integer": {
"regex-rule": r"-?\d+",
"example": "10"
},
"valid-email": {
"regex-rule": r"[^@\s]+@[^@\s]+\.[^\s@]+",
"example": "[email protected]"
},
"valid-url": {
"regex-rule": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?",
"example": "https://github.com"
},
"valid-url-or-na": {
"regex-rule": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?)|" + _NOT_APPLICABLE_RULES,
"example": "https://github.com"
},
"match:vN.M": {
"regex-rule": r"v\d\.\d",
"example": "v1.0"
},
"datetime": {
"regex-rule": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
"example": "2023-11-17T15:00:00"
},
"datetime-or-na": {
"regex-rule": r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?)|" + _NOT_APPLICABLE_RULES,
"example": "2023-11-17T15:00:00"
},
"number": {
"regex-rule": r"-?\d+(\.\d+)?",
"example": "10.5"
},
"location": {
"regex-rule": r'(.)+(\,\ )(.)+',
"example": "Chilbolton Atmospheric Observatory, Chilbolton, Hampshire, UK"
},
"latitude-image": {
"regex-rule": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}',
"example": "12.345678"
},
"longitude-image": {
"regex-rule": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}',
"example": "123.456789"
},
"title": {
"regex-rule": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"example": "ncas-cam-9_cao_20210623-215001_v1.0.jpg"
},
"title-data-product": {
"regex-rule": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"example": "ncas-cam-9_cao_20210623-215001_photo_v1.0.jpg"
},
"name-format": {
"regex-rule": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))',
"example": "Jones, Sam"
},
"name-characters": {
"regex-rule": r'[A-Za-z_À-ÿ\-\'\ \.\,]+',
"example": "Jones, Sam"
},
"altitude-image-warning": {
"regex-rule": r'-?\d+\sm', # should be integers only for images
"example": "123 m"
},
"altitude-image": {
"regex-rule": r'-?\d+(\.\d+)?\sm',
"example": "123.45 m"
},
"ncas-email": {
"regex-rule": r'[^@\s][email protected]',
"example": "[email protected]"
}
}

def _map_type_rule(self, type_rule):
Expand Down Expand Up @@ -87,10 +140,10 @@ def check(self, rule_lookup, value, context=None, label=""):
regex_rule = i.split(":", 1)[1]

if regex_rule in self.static_regex_rules:
pattern = self.static_regex_rules[regex_rule]
pattern = self.static_regex_rules[regex_rule]["regex-rule"]

if not re.match("^" + pattern + "$", value):
output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}' - Example valid value '{self.static_regex_rules[regex_rule]['example']}'.")

else:
raise Exception(f"Regex rule not found with rule ID: {i}.")
Expand Down
16 changes: 8 additions & 8 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,13 @@ def test_check_var():
variable = "var2"
defined_attrs = ["long_name:Variable 2", "units:s"]
errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."]
assert errors == ["[variable**************:var2]: Attribute 'units' must have definition 's', not 'kg'."]
assert warnings == []

variable = "var2:__OPTIONAL__"
defined_attrs = ["long_name:Variable 2", "units:s"]
errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."]
assert errors == ["[variable**************:var2]: Attribute 'units' must have definition 's', not 'kg'."]
assert warnings == []

# Test that the function correctly handles badly formatted flag_values
Expand Down Expand Up @@ -354,7 +354,7 @@ def test_check_file_name():
}
file_name = "inst3_plat1_20220101_prod1_v1.0.nc"
errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks)
assert errors == ["[file name]: Invalid file name format - unknown instrument inst3"]
assert errors == ["[file name]: Invalid file name format - unknown instrument 'inst3'"]
assert warnings == []

# Test that the function correctly identifies invalid platform name
Expand All @@ -366,25 +366,25 @@ def test_check_file_name():
# Test that the function correctly identifies invalid date format
file_name = "inst1_plat1_2022010_prod1_v1.0.nc"
errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks)
assert errors == ["[file name]: Invalid file name format - bad date format 2022010"]
assert errors == ["[file name]: Invalid file name format - bad date format '2022010'"]
assert warnings == []

# Test that the function correctly identifies invalid date
file_name = "inst1_plat1_20221301_prod1_v1.0.nc"
errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks)
assert errors == ["[file name]: Invalid file name format - invalid date in file name 20221301"]
assert errors == ["[file name]: Invalid file name format - invalid date in file name '20221301'"]
assert warnings == []

# Test that the function correctly identifies invalid data product
file_name = "inst1_plat1_20220101_prod3_v1.0.nc"
errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks)
assert errors == ["[file name]: Invalid file name format - unknown data product prod3"]
assert errors == ["[file name]: Invalid file name format - unknown data product 'prod3'"]
assert warnings == []

# Test that the function correctly identifies invalid version number format
file_name = "inst1_plat1_20220101_prod1_v10.nc"
errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks)
assert errors == ["[file name]: Invalid file name format - incorrect file version number v10"]
assert errors == ["[file name]: Invalid file name format - incorrect file version number 'v10'"]
assert warnings == []

# Test that the function correctly identifies too many options in file name
Expand All @@ -396,7 +396,7 @@ def test_check_file_name():
# Test that the function correctly handles multiple errors
file_name = "inst3_plat3_20220101_prod1_v1.0.nc"
errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks)
assert errors == ["[file name]: Invalid file name format - unknown instrument inst3","[file name]: Invalid file name format - 'plat3' must be one of: '['plat1', 'plat2']'"]
assert errors == ["[file name]: Invalid file name format - unknown instrument 'inst3'","[file name]: Invalid file name format - 'plat3' must be one of: '['plat1', 'plat2']'"]
assert warnings == []

# Test that the function correctly handles valid file names
Expand Down
2 changes: 1 addition & 1 deletion tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_other_plot_checks(plot, error_level, number_errors):
("nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png",
(
"[global-attributes:******:XMP-dc:Title]*** Value 'nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png'"
" does not match regex rule: 'title'."
" does not match regex rule: 'title' - Example valid value 'ncas-cam-9_cao_20210623-215001_v1.0.jpg'."
),
),
("nerc-mstrf-met-sensors_capel-dewi_20160906_campbell-sci.png",
Expand Down
Loading
Loading