Skip to content

Commit

Permalink
Added new ESA CCI filename specs files. Added more verbose error mess…
Browse files Browse the repository at this point in the history
…ages to generic.py and updated test_generic.
  • Loading branch information
knappett committed Dec 11, 2024
1 parent f0ef22c commit 0a47805
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 8 deletions.
8 changes: 4 additions & 4 deletions checksit/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten
!= []
):
errors.append(
f"[file name]: Unknown field '{key}' in vocab {field}"
f"[file name]: Unknown field '{key}' in vocab {field}."
)
if spec_verb:
print(errors[-1])
Expand Down Expand Up @@ -650,7 +650,7 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten
print(f"Date string {key} matches the required format")
else:
errors.append(
f"[file name]: Invalid date/time string '{key}'"
f"[file name]: Invalid date/time string '{key}'. Date/time should take the form YYYY[MM[DD[HH[MM[SS]]]]], where the fields in brackets are optional."
)
if spec_verb:
print(errors[-1])
Expand All @@ -663,15 +663,15 @@ def check_generic_file_name(file_name, vocab_checks=None, segregator=None, exten
print(f"File version {key} matches the required format")
else:
errors.append(
f"[file name]: Invalid file version '{key}'"
f"[file name]: Invalid file version '{key}'. File versions should take the form n{{1,}}[.n{{1,}}]."
)
if spec_verb:
print(errors[-1])

else:
# FIELD NOT RECOGNISED
errors.append(
f"[file name]: {field} field type not recognised"
f"[file name]: {field} field type not recognised."
)
if spec_verb:
print(errors[-1])
Expand Down
28 changes: 28 additions & 0 deletions specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
file-name-format:
func: checksit.generic.check_generic_file_name
params:
vocab_checks:
# ESACCI
field00: __vocabs__:esa-cci-file-name-config:field00
# CCI Project (e.g. SEAICE)
field01: __vocabs__:esa-cci-file-name-config:field01
# Processing Level (e.g. L3C)
field02: __vocabs__:esa-cci-file-name-config:field02
# Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary
# https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
field03: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
# Product String (e.g. NIMBUS5_ESMR-EASE2_NH)
# http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# Additional segregator (also stored in the 'product' vocabulary)
field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# Date and time
field06: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S
# File version
field07: __version__:^fv\d?\d.?\d?\d?$
segregator:
seg: '-'
extension:
ext: '.nc'
spec_verbose:
spec_verb: True
30 changes: 30 additions & 0 deletions specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
file-name-format:
func: checksit.generic.check_generic_file_name
params:
vocab_checks:
# Date and time
field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S
# ESACCI
field01: __vocabs__:esa-cci-file-name-config:field00
# Processing Level (e.g. L3C)
field02: __vocabs__:esa-cci-file-name-config:field02
# CCI Project (e.g. SEAICE)
field03: __vocabs__:esa-cci-file-name-config:field01
# Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary
# https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
# Product String (e.g. NIMBUS5_ESMR-EASE2_NH)
# http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# Additional segregator (also stored in the 'product' vocabulary)
field06: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# GDS version
field07: __version__:^v\d?\d.?\d?\d?$
# File version
field08: __version__:^fv\d?\d.?\d?\d?$
segregator:
seg: '-'
extension:
ext: '.nc'
spec_verbose:
spec_verb: True
28 changes: 28 additions & 0 deletions specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
file-name-format:
func: checksit.generic.check_generic_file_name
params:
vocab_checks:
# Date and time
field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S
# ESACCI
field01: __vocabs__:esa-cci-file-name-config:field00
# Processing Level (e.g. L3C)
field02: __vocabs__:esa-cci-file-name-config:field02
# CCI Project (e.g. SEAICE)
field03: __vocabs__:esa-cci-file-name-config:field01
# Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary
# https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
# Product String (e.g. NIMBUS5_ESMR-EASE2_NH)
# http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# Additional segregator (also stored in the 'product' vocabulary)
field06: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# File version
field07: __version__:^fv\d?\d.?\d?\d?$
segregator:
seg: '-'
extension:
ext: '.nc'
spec_verbose:
spec_verb: True
28 changes: 28 additions & 0 deletions specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
file-name-format:
func: checksit.generic.check_generic_file_name
params:
vocab_checks:
# Date and time
field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S
# ESACCI
field01: __vocabs__:esa-cci-file-name-config:field00
# Processing Level (e.g. L3C)
field02: __vocabs__:esa-cci-file-name-config:field02
# CCI Project (e.g. SEAICE)
field03: __vocabs__:esa-cci-file-name-config:field01
# Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary
# https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
# Product String (e.g. NIMBUS5_ESMR-EASE2_NH)
# http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# GDS version
field07: __version__:^v\d?\d.?\d?\d?$
# File version
field08: __version__:^fv\d?\d.?\d?\d?$
segregator:
seg: '-'
extension:
ext: '.nc'
spec_verbose:
spec_verb: True
26 changes: 26 additions & 0 deletions specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
file-name-format:
func: checksit.generic.check_generic_file_name
params:
vocab_checks:
# Date and time
field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S
# ESACCI
field01: __vocabs__:esa-cci-file-name-config:field00
# Processing Level (e.g. L3C)
field02: __vocabs__:esa-cci-file-name-config:field02
# CCI Project (e.g. SEAICE)
field03: __vocabs__:esa-cci-file-name-config:field01
# Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary
# https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json
# Product String (e.g. NIMBUS5_ESMR-EASE2_NH)
# http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json
# File version
field06: __version__:^fv\d?\d.?\d?\d?$
segregator:
seg: '-'
extension:
ext: '.nc'
spec_verbose:
spec_verb: True
32 changes: 28 additions & 4 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def test_check_file_name():
assert warnings == []

def test_check_generic_file_name():
# Test that the function correctly identifies invalid instrument name
# Test for Standard ESA CCI file name
vocab_checks = {
'field00': '__vocabs__:esa-cci-file-name-config:field00',
'field01': '__vocabs__:esa-cci-file-name-config:field01',
Expand All @@ -490,7 +490,7 @@ def test_check_generic_file_name():
# Incorrect field00
file_name = "ESAC3S-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.1.nc"
errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension)
assert errors == ["[file name]: Unknown field 'ESAC3S' in vocab __vocabs__:esa-cci-file-name-config:field00"]
assert errors == ["[file name]: Unknown field 'ESAC3S' in vocab __vocabs__:esa-cci-file-name-config:field00."]
assert warnings == []

# Incorrect multiple fields
Expand All @@ -502,13 +502,37 @@ def test_check_generic_file_name():
# Incorrect date
file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231241000000-fv09.1.nc"
errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension)
assert errors == ["[file name]: Invalid date/time string '20231241000000'"]
assert errors == ["[file name]: Invalid date/time string '20231241000000'. Date/time should take the form YYYY[MM[DD[HH[MM[SS]]]]], where the fields in brackets are optional."]
assert warnings == []

# Incorrect version format
file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.2.1.nc"
errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension)
assert errors == ["[file name]: Invalid file version 'fv09.2.1'"]
assert errors == ["[file name]: Invalid file version 'fv09.2.1'. File versions should take the form n{1,}[.n{1,}]."]
assert warnings == []

# Test for Additional Segregator ESA CCI file name
vocab_checks = {
'field00': '__vocabs__:esa-cci-file-name-config:field00',
'field01': '__vocabs__:esa-cci-file-name-config:field01',
'field02': '__vocabs__:esa-cci-file-name-config:field02',
'field03': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json',
'field04': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json',
'field05': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json',
'field06': '__date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S',
'field07': '__version__:^fv\\d?\\d.?\\d?\\d?$'
}
segregator = {
'seg': '-'
}
extension = {
'ext': '.nc'
}

# Legitimate Additional Segregator ESA CCI file name - should pass wihtout error
file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-TEST_ADD_SEG-20231231000000-fv09.1.nc"
errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension)
assert errors == ["[file name]: Unknown field 'TEST_ADD_SEG' in vocab __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json."]
assert warnings == []

def test_check_radar_moment_variables():
Expand Down

0 comments on commit 0a47805

Please sign in to comment.