diff --git a/checksit/check.py b/checksit/check.py index 0c86044..8d2f523 100644 --- a/checksit/check.py +++ b/checksit/check.py @@ -196,7 +196,7 @@ def _check_file( for spec in specs: sr = SpecificationChecker(spec) - if "amof-file-name" in spec: + if "file-name" in spec: spec_errors, spec_warnings = sr.run_checks( file_content.inpt.split("/")[-1] ) diff --git a/checksit/cvs.py b/checksit/cvs.py index 23c9bac..360494a 100644 --- a/checksit/cvs.py +++ b/checksit/cvs.py @@ -25,26 +25,57 @@ def _load(self, vocab_id): vocab_file = os.path.join(vocabs_dir, f"{vocab_id}.json") self._vocabs[vocab_id] = json.load(open(vocab_file)) - def _load_from_url(self, vocab_id): - # Loads a specific vocabulary from a URL - vocab_id_url = vocab_id.replace("__URL__", "https://") - if ( - vocab_id_url.startswith("https://raw.githubusercontent.com") - and "/__latest__/" in vocab_id_url - ): - vocab_id_url_base = vocab_id_url.split("/__latest__")[0] - vocab_id_url_base = vocab_id_url_base.replace( - "raw.githubusercontent.com", "github.com" - ) + def _load_from_url_github(self, vocab_id_url: str): + vocab_list = [] + vocab_id_url_base = vocab_id_url.split("/__latest__")[0] + vocab_id_url_base = vocab_id_url_base.replace( + "raw.githubusercontent.com", "github.com" + ) + if "/__latest__/" in vocab_id_url: latest_version = requests.get( f"{vocab_id_url_base}/releases/latest" ).url.split("/")[-1] vocab_id_url = vocab_id_url.replace("__latest__", latest_version) res = requests.get(vocab_id_url.replace("__URL__", "https://")) - if res.status_code == 200: - self._vocabs[vocab_id] = res.json() + if res.status_code != 200: + print(f"[WARNING] Failed to load vocab: {vocab_id_url}") + return vocab_list + vocab_list = res.json() + + return vocab_list + + def _load_from_url_esacci(self, vocab_id_url: str): + vocab_list = [] + res = requests.get(vocab_id_url) + if res.status_code != 200: + print(f"[WARNING] Failed to load vocab: {vocab_id_url}") + return vocab_list + js = res.json() + + if 'dataType' in vocab_id_url: + vocab_list=sorted([altLabel[0]["@value"] for js_dct in js for key, altLabel in js_dct.items() if key.endswith("#altLabel")]) + elif 'product' in vocab_id_url: + vocab_list=sorted([prefLabel[0]["@value"] for js_dct in js for key, prefLabel in js_dct.items() if key.endswith("#prefLabel")]) + else: + print(f"[WARNING] ESA CCI vocab url not recognised: {vocab_id_url}") + + return vocab_list + + def _load_from_url(self, vocab_id: str): + # Loads a specific vocabulary from a URL + vocab_id_url = vocab_id.replace("__URL__", "https://") + if ( + vocab_id_url.startswith("https://raw.githubusercontent.com") + ): + vocab_list=self._load_from_url_github(vocab_id_url) + elif ( + vocab_id_url.startswith("https://vocab.ceda.ac.uk") + ): + vocab_list=self._load_from_url_esacci(vocab_id_url) else: - print(f"[WARNING] Failed to load vocab: {vocab_id}") + print(f"Vocabulary url provided is not recognised: {vocab_id_url}") + + self._vocabs[vocab_id] = vocab_list def __getitem__(self, vocab_id): # Enables dictionary access to individual vocabulary items @@ -85,16 +116,21 @@ def lookup(self, vocab_lookup): return obj - def check(self, vocab_lookup, value, label="", lookup=True): + def check(self, vocab_lookup, value, label="", lookup=True, spec_verb=False): # Return a list of errors - empty list if no errors errors = [] options = [self.lookup(vocab_lookup) if lookup else vocab_lookup][0] + if spec_verb: + print(f"Vocab lookup: {vocab_lookup}") if isinstance(options, list): if value not in options: errors.append( f"{label} '{value}' not in vocab options: {options} (using: '{vocab_lookup}')" ) + else: + if spec_verb: + print(f"Value: {value} is in list {options}") elif isinstance(options, dict): for key in options.keys(): if key in value.keys(): diff --git a/checksit/generic.py b/checksit/generic.py index c000e7e..7fe8771 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -11,7 +11,9 @@ DATE_REGEX = re.compile( r"^\d{4}$|^\d{6}$|^\d{8}$|^\d{8}-\d{2}$|^\d{8}-\d{4}$|^\d{8}-\d{6}$" ) - +DATE_REGEX_GENERIC = re.compile( + r"^\d{4}$|^\d{6}$|^\d{8}$|^\d{10}$|^\d{12}$|^\d{14}$" +) def _get_bounds_var_ids(dct): return [ @@ -552,6 +554,132 @@ def check_file_name(file_name, vocab_checks=None, rule_checks=None, **kwargs): return errors, warnings +def check_generic_file_name(file_name, vocab_checks=None, segregator=None, extension=None, spec_verbose=False, **kwargs): + # Requires yaml file containing a list of file name fields and segregators + # Loop over each file field and segregator until there are no more + # check against defined file extension + + vocab_checks = vocab_checks or {} + try: + seg = segregator["seg"] + except: + seg='_' + try: + ext = extension["ext"] + except: + ext = '.test' + try: + spec_verb = spec_verbose["spec_verb"] + except: + spec_verb = False + + errors = [] + warnings = [] + + # get filename parts + if not isinstance(file_name,str): + raise ValueError + + extracted_name = file_name.replace(ext,'') + file_name_parts = extracted_name.split(seg) + + if spec_verb: + print(f"File name: {file_name}") + print(f"Segregator: {seg}") + print(f"Extension: {ext}") + print(f"All file name parts: {file_name_parts}") + + # Loop over file name parts + for idx, key in enumerate(file_name_parts): + if spec_verb: + print('') + print(idx, key) + num=f"{idx:02}" + + # Check if number of file name parts matches the number of fields specified in the user-defined yaml file + if len(vocab_checks) < len(file_name_parts): + errors.append( + f"[file name]: Number of file name fields ({len(file_name_parts)}) is greater than the {len(vocab_checks)} fields expected." + ) + if spec_verb: + print(errors[-1]) + break + elif len(vocab_checks) > len(file_name_parts): + errors.append( + f"[file name]: Number of file name fields ({len(file_name_parts)}) is less than the {len(vocab_checks)} fields expected." + ) + if spec_verb: + print(errors[-1]) + break + else: + field=vocab_checks["field"+num] + + if field.startswith('__vocabs__') or field.startswith('__URL__'): + # VOCAB (config or URL) + if ( + vocabs.check(field, key, spec_verb=spec_verb) + != [] + ): + errors.append( + f"[file name]: Unknown field '{key}' in vocab {field}." + ) + if spec_verb: + print(errors[-1]) + + elif field.startswith('__date__'): + # DATE REGEX + datefmts=(field.split(":"))[1] + fmts=(datefmts.split(",")) + if spec_verb: + print(f"Valid date formats: {fmts}") + + if not DATE_REGEX_GENERIC.match(key): + errors.append( + f"[file name]: Expecting date/time - bad date format '{key}'" + ) + if spec_verb: + print(errors[-1]) + else: + valid_date_found = False + for f in fmts: + try: + t = dt.datetime.strptime(key, f) + valid_date_found = True + break + except ValueError: + pass + if valid_date_found: + if spec_verb: + print(f"Date string {key} matches the required format") + else: + errors.append( + f"[file name]: Invalid date/time string '{key}'. Date/time should take the form YYYY[MM[DD[HH[MM[SS]]]]], where the fields in brackets are optional." + ) + if spec_verb: + print(errors[-1]) + + elif field.startswith('__version__'): + # FILE/PRODUCT VERSION + verfmt=(field.split(":"))[1] + if re.match(verfmt, key): + if spec_verb: + print(f"File version {key} matches the required format") + else: + errors.append( + f"[file name]: Invalid file version '{key}'. File versions should take the form n{{1,}}[.n{{1,}}]." + ) + if spec_verb: + print(errors[-1]) + + else: + # FIELD NOT RECOGNISED + errors.append( + f"[file name]: {field} field type not recognised." + ) + if spec_verb: + print(errors[-1]) + + return errors, warnings def check_radar_moment_variables( dct, exist_attrs=None, rule_attrs=None, one_of_attrs=None, skip_spellcheck=False diff --git a/checksit/vocabs/esa-cci-file-name-config.json b/checksit/vocabs/esa-cci-file-name-config.json new file mode 100644 index 0000000..ad20a83 --- /dev/null +++ b/checksit/vocabs/esa-cci-file-name-config.json @@ -0,0 +1,5 @@ +{ + "field00": ["ESACCI"], + "field01": ["AEROSOL","AIS","BIOMASS","CLOUD","FIRE","GHG","GHRSST","GIS","GLACIERS","HRLC","ICESHEETS","LAKES","LC","LST","OC","OZONE","PERMAFROST","RD","SEAICE","SEALEVEL","SEASTATE","SEASURFACESALINITY","SNOW","SOILMOISTURE","SST","VEGETATION","WATERVAPOUR"], + "field02": ["L0","L1A","L1B","L1C","L2","L2P","L3","L3U","L3C","L3S","L4","IND"] +} \ No newline at end of file diff --git a/checksit/vocabs/esa-cci-global-attrs-config.json b/checksit/vocabs/esa-cci-global-attrs-config.json new file mode 100644 index 0000000..3fbf852 --- /dev/null +++ b/checksit/vocabs/esa-cci-global-attrs-config.json @@ -0,0 +1,5 @@ +{ + "Conventions": ["CF-1.5","CF-1.6","CF-1.7","CF-1.8","CF-1.9"], + "project": "Climate Change Initiative - European Space Agency", + "license": "ESA CCI Data Policy: free and open access" +} \ No newline at end of file diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml new file mode 100644 index 0000000..a0d2e7f --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-add-seg.yml @@ -0,0 +1,28 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # ESACCI + field00: __vocabs__:esa-cci-file-name-config:field00 + # CCI Project (e.g. SEAICE) + field01: __vocabs__:esa-cci-file-name-config:field01 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field03: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Additional segregator (also stored in the 'product' vocabulary) + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Date and time + field06: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # File version + field07: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml new file mode 100644 index 0000000..ce8269b --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg-gds.yml @@ -0,0 +1,30 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Additional segregator (also stored in the 'product' vocabulary) + field06: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # GDS version + field07: __version__:^v\d?\d.?\d?\d?$ + # File version + field08: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml new file mode 100644 index 0000000..606d262 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-add-seg.yml @@ -0,0 +1,28 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Additional segregator (also stored in the 'product' vocabulary) + field06: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # File version + field07: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml new file mode 100644 index 0000000..6941f2d --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-gds.yml @@ -0,0 +1,28 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # GDS version + field07: __version__:^v\d?\d.?\d?\d?$ + # File version + field08: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml new file mode 100644 index 0000000..f042cc4 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name-ghrsst-std.yml @@ -0,0 +1,26 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # Date and time + field00: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # ESACCI + field01: __vocabs__:esa-cci-file-name-config:field00 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # CCI Project (e.g. SEAICE) + field03: __vocabs__:esa-cci-file-name-config:field01 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field05: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # File version + field06: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: True diff --git a/specs/groups/esa-cci-v1.0/esa-cci-file-name.yml b/specs/groups/esa-cci-v1.0/esa-cci-file-name.yml new file mode 100644 index 0000000..8028be8 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-file-name.yml @@ -0,0 +1,26 @@ +file-name-format: + func: checksit.generic.check_generic_file_name + params: + vocab_checks: + # ESACCI + field00: __vocabs__:esa-cci-file-name-config:field00 + # CCI Project (e.g. SEAICE) + field01: __vocabs__:esa-cci-file-name-config:field01 + # Processing Level (e.g. L3C) + field02: __vocabs__:esa-cci-file-name-config:field02 + # Data Type (e.g. SICONC): this is the 'alternative label' of those in the vocabulary + # https://vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + field03: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json + # Product String (e.g. NIMBUS5_ESMR-EASE2_NH) + # http://vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + field04: __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json + # Date and time + field05: __date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S + # File version + field06: __version__:^fv\d?\d.?\d?\d?$ + segregator: + seg: '-' + extension: + ext: '.nc' + spec_verbose: + spec_verb: False diff --git a/specs/groups/esa-cci-v1.0/esa-cci-global-attrs.yml b/specs/groups/esa-cci-v1.0/esa-cci-global-attrs.yml new file mode 100644 index 0000000..dd51bf7 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-global-attrs.yml @@ -0,0 +1,45 @@ +required-global-attrs0: + func: checksit.generic.check_global_attrs + params: + defined_attrs: + - title + - institution + - source + - history + - references + - tracking_id + - Conventions + - product_version + - format_version + - summary + - keywords + - id + - naming_authority + - keywords_vocabulary + - cdm_data_type + - comment + - date_created + - creator_name + - creator_url + - creator_email + - project + - geospatial_lat_min + - geospatial_lat_max + - geospatial_lon_min + - geospatial_lon_max + - geospatial_vertical_min + - geospatial_vertical_max + - time_coverage_start + - time_coverage_end + - time_coverage_duration + - time_coverage_resolution + - standard_name_vocabulary + - license + - platform + - sensor + - spatial_resolution + - key_variables + vocab_attrs: + Conventions: __vocabs__:esa-cci-global-attrs-config:Conventions + project: __vocabs__:esa-cci-global-attrs-config:project + license: __vocabs__:esa-cci-global-attrs-config:license diff --git a/specs/groups/esa-cci-v1.0/esa-cci-variable-attrs.yml b/specs/groups/esa-cci-v1.0/esa-cci-variable-attrs.yml new file mode 100644 index 0000000..f5910f3 --- /dev/null +++ b/specs/groups/esa-cci-v1.0/esa-cci-variable-attrs.yml @@ -0,0 +1,17 @@ +var-requires1: + func: checksit.generic.check_var + params: + variable: + - latitude + defined_attrs: + - units:degree_north + - standard_name:latitude + +var-requires3: + func: checksit.generic.check_var + params: + variable: + - longitude + defined_attrs: + - units:degree_east + - standard_name:longitude diff --git a/tests/test_generic.py b/tests/test_generic.py index 1ef8d27..c41e314 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -463,6 +463,77 @@ def test_check_file_name(): assert errors == [] assert warnings == [] +def test_check_generic_file_name(): + # Test for Standard ESA CCI file name + vocab_checks = { + 'field00': '__vocabs__:esa-cci-file-name-config:field00', + 'field01': '__vocabs__:esa-cci-file-name-config:field01', + 'field02': '__vocabs__:esa-cci-file-name-config:field02', + 'field03': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json', + 'field04': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json', + 'field05': '__date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S', + 'field06': '__version__:^fv\\d?\\d.?\\d?\\d?$' + } + segregator = { + 'seg': '-' + } + extension = { + 'ext': '.nc' + } + + # Legitimate file name - should pass wihtout error + file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == [] + assert warnings == [] + + # Incorrect field00 + file_name = "ESAC3S-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == ["[file name]: Unknown field 'ESAC3S' in vocab __vocabs__:esa-cci-file-name-config:field00."] + assert warnings == [] + + # Incorrect multiple fields + file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-SSS-COMBINED-20231231000000-fv09.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == ["[file name]: Number of file name fields (8) is greater than the 7 fields expected."] + assert warnings == [] + + # Incorrect date + file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231241000000-fv09.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == ["[file name]: Invalid date/time string '20231241000000'. Date/time should take the form YYYY[MM[DD[HH[MM[SS]]]]], where the fields in brackets are optional."] + assert warnings == [] + + # Incorrect version format + file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-20231231000000-fv09.2.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == ["[file name]: Invalid file version 'fv09.2.1'. File versions should take the form n{1,}[.n{1,}]."] + assert warnings == [] + + # Test for Additional Segregator ESA CCI file name + vocab_checks = { + 'field00': '__vocabs__:esa-cci-file-name-config:field00', + 'field01': '__vocabs__:esa-cci-file-name-config:field01', + 'field02': '__vocabs__:esa-cci-file-name-config:field02', + 'field03': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/dataType.json', + 'field04': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json', + 'field05': '__URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json', + 'field06': '__date__:%Y,%Y%m,%Y%m%d,%Y%m%d%H,%Y%m%d%H%M,%Y%m%d%H%M%S', + 'field07': '__version__:^fv\\d?\\d.?\\d?\\d?$' + } + segregator = { + 'seg': '-' + } + extension = { + 'ext': '.nc' + } + + # Legitimate Additional Segregator ESA CCI file name - should pass wihtout error + file_name = "ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-TEST_ADD_SEG-20231231000000-fv09.1.nc" + errors, warnings = cg.check_generic_file_name(file_name, vocab_checks, segregator, extension) + assert errors == ["[file name]: Unknown field 'TEST_ADD_SEG' in vocab __URL__vocab.ceda.ac.uk/scheme/cci/cci-content/product.json."] + assert warnings == [] def test_check_radar_moment_variables(): dct = {