diff --git a/checksit/generic.py b/checksit/generic.py index d5738de..1fd31df 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -618,7 +618,7 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten != [] ): errors.append( - f"[file name]: Unknown field '{key}' in vocab {field}" + f"[file name]: Unknown field '{key}' in vocab {field}." ) if spec_verb: print(errors[-1]) @@ -650,7 +650,7 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten print(f"Date string {key} matches the required format") else: errors.append( - f"[file name]: Invalid date/time string '{key}'" + f"[file name]: Invalid date/time string '{key}'. Date/time should take the form YYYY[MM[DD[HH[MM[SS]]]]], where the fields in brackets are optional." ) if spec_verb: print(errors[-1]) @@ -663,7 +663,7 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten print(f"File version {key} matches the required format") else: errors.append( - f"[file name]: Invalid file version '{key}'" + f"[file name]: Invalid file version '{key}'. File versions should take the form n{{1,}}[.n{{1,}}]." ) if spec_verb: print(errors[-1]) @@ -671,7 +671,7 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten else: # FIELD NOT RECOGNISED errors.append( - f"[file name]: {field} field type not recognised" + f"[file name]: {field} field type not recognised." ) if spec_verb: print(errors[-1]) diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml new file mode 100644 index 0000000..a0d2e7f --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml @@ -0,0 +1,28 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # ESACCI + field00: __vocabs__:esa-cci-file-name-config:field00 + # CCI Project (e.g. SEAICE) + field01: __vocabs__:esa-cci-file-name-config:field01 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field03: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Additional segregator (also stored in the 'product' vocabulary) + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Date and time + field06: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # File version + field07: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml new file mode 100644 index 0000000..ce8269b --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml @@ -0,0 +1,30 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Additional segregator (also stored in the 'product' vocabulary) + field06: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # GDS version + field07: __version__:^v\d?\d.?\d?\d?$ + # File version + field08: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml new file mode 100644 index 0000000..606d262 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml @@ -0,0 +1,28 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Additional segregator (also stored in the 'product' vocabulary) + field06: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # File version + field07: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml new file mode 100644 index 0000000..6941f2d --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml @@ -0,0 +1,28 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # GDS version + field07: __version__:^v\d?\d.?\d?\d?$ + # File version + field08: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml new file mode 100644 index 0000000..f042cc4 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml @@ -0,0 +1,26 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # File version + field06: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/tests/test_generic.py b/tests/test_generic.py index be4501a..c41e314 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -464,7 +464,7 @@ def test_check_file_name(): assert warnings == [] def test_check_generic_file_name(): - # Test that the function correctly identifies invalid instrument name + # Test for Standard ESA CCI file name vocab_checks = { 'field00': '__vocabs__:esa-cci-file-name-config:field00', 'field01': '__vocabs__:esa-cci-file-name-config:field01', @@ -490,7 +490,7 @@ def test_check_generic_file_name(): # Incorrect field00 file_name = "ESAC3S-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.1.nc" errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) - assert errors == ["[file name]: Unknown field 'ESAC3S' in vocab __vocabs__:esa-cci-file-name-config:field00"] + assert errors == ["[file name]: Unknown field 'ESAC3S' in vocab __vocabs__:esa-cci-file-name-config:field00."] assert warnings == [] # Incorrect multiple fields @@ -502,13 +502,37 @@ def test_check_generic_file_name(): # Incorrect date file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231241000000-fv09.1.nc" errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) - assert errors == ["[file name]: Invalid date/time string '20231241000000'"] + assert errors == ["[file name]: Invalid date/time string '20231241000000'. Date/time should take the form YYYY[MM[DD[HH[MM[SS]]]]], where the fields in brackets are optional."] assert warnings == [] # Incorrect version format file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.2.1.nc" errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) - assert errors == ["[file name]: Invalid file version 'fv09.2.1'"] + assert errors == ["[file name]: Invalid file version 'fv09.2.1'. File versions should take the form n{1,}[.n{1,}]."] + assert warnings == [] + + # Test for Additional Segregator ESA CCI file name + vocab_checks = { + 'field00': '__vocabs__:esa-cci-file-name-config:field00', + 'field01': '__vocabs__:esa-cci-file-name-config:field01', + 'field02': '__vocabs__:esa-cci-file-name-config:field02', + 'field03': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json', + 'field04': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json', + 'field05': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json', + 'field06': '__date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S', + 'field07': '__version__:^fv\\d?\\d.?\\d?\\d?$' + } + segregator = { + 'seg': '-' + } + extension = { + 'ext': '.nc' + } + + # Legitimate Additional Segregator ESA CCI file name - should pass wihtout error + file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-TEST_ADD_SEG-20231231000000-fv09.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == ["[file name]: Unknown field 'TEST_ADD_SEG' in vocab __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json."] assert warnings == [] def test_check_radar_moment_variables():