From 73fe8ac3187ce70e85356fffb30ddfd35ad857e7 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 1 Dec 2023 12:52:28 +0000 Subject: [PATCH] Auto-find specs for NCAS-IMAGE files --- checksit/check.py | 43 +++++++++++++++++++++++++++++++++-------- checksit/readers/cdl.py | 2 +- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/checksit/check.py b/checksit/check.py index 32473226..af5b3f80 100644 --- a/checksit/check.py +++ b/checksit/check.py @@ -216,12 +216,16 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None # tmpl = self.parse_file_header(template, auto_cache=auto_cache, verbose=verbose) - ### Check for AMOF netCDF file and gather specs ### - if template == "auto" and file_path.split('.')[-1] == 'nc': - # Look for AMOF Convention string in Conventions global attr, if it exists - if ':Conventions' in file_content.cdl: - conventions = file_content.cdl.split(':Conventions =')[1].split(';')[0].strip() - if "NCAS-AMOF" in conventions or "NCAS-GENERAL" in conventions or "NCAS-AMF" in conventions: + ### Check for NCAS data files and gather specs ### + # if template and specs are "default" values, check to see if + # file is an ncas file (assuming file name starts with instrument name) + if (template == "auto" and specs == None and + file_path.split("/")[-1].startswith("ncas-")): + # find appropriate specs depending on convention + if file_path.split(".")[-1] == "nc" and ":Conventions" in file_content.cdl: + conventions = file_content.cdl.split(":Conventions =")[1].split(";")[0].strip() + # NCAS-GENERAL file + if any(name in conventions for name in ["NCAS-GENERAL", "NCAS-AMF", "NCAS-AMOF"]): if verbose: print("\nNCAS-AMOF file detected, finding correct spec files") print("Finding correct AMOF version...") @@ -232,7 +236,7 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None # check specs exist for that version specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}") if not os.path.exists(specs_dir): - if verbose: print(f"Specs for version {version_number} not found, attempting download...") + if verbose: print(f"Specs for version NCAS-GENERAL-{version_number} not found, attempting download...") try: vocabs_dir = os.path.join(conf["settings"].get("vocabs_dir", "./checksit/vocabs"), f"AMF_CVs/{version_number}") cvs = urllib.request.urlopen(f"https://github.com/ncasuk/AMF_CVs/tree/v{version_number}/AMF_CVs") @@ -261,7 +265,6 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None sys.exit() except: raise - # get deployment mode and data product, to then get specs deployment_mode = file_content.cdl.split(':deployment_mode =')[1].split(';')[0].strip().strip('"') @@ -272,6 +275,30 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None # don't need to do template check template = "off" + # NCAS-RADAR (coming soon...) + # if "NCAS-Radar" in conventions + + elif (file_path.split(".")[-1] in ["png", "PNG", "jpg", "JPG", "jpeg", "JPEG"] and + "XMP-photoshop:Instructions" in file_content.global_attrs.keys()): + conventions = file_content.global_attrs["XMP-photoshop:Instructions"] + if "National Centre for Atmospheric Science Image Metadata Standard" in file_content.global_attrs["XMP-photoshop:Instructions"].replace("\n"," "): + if verbose: + print("\nNCAS-IMAGE file detected, finding correct spec files") + print("Finding correct IMAGE version...") + version_number = conventions.replace("\n"," ").split("Metadata Standard ")[1].split(":")[0] + spec_folder = f"ncas-image-{version_number}" + if verbose: print(f" {version_number}") + specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}") + if not os.path.exists(specs_dir): + print(f"[ERROR] specs for NCAS-IMAGE {version_number} can not be found.") + print("Aborting...") + sys.exit() + product = file_path.split('/')[-1].split('_')[3] + product_spec = f"{spec_folder}/amof-{product}" + specs = [product_spec, f"{spec_folder}/amof-image-global-attrs"] + template = "off" + + if template == "off": tmpl = template diff --git a/checksit/readers/cdl.py b/checksit/readers/cdl.py index 5ee68afe..a040e409 100644 --- a/checksit/readers/cdl.py +++ b/checksit/readers/cdl.py @@ -78,7 +78,7 @@ def _get_sections(self, lines, split_patterns, start_at): if split_patterns: splitter = split_patterns.popleft() else: - line_no_comments = re.split(";\s+//.*$", line)[0].strip().rstrip(";").strip() + line_no_comments = re.split(r";\s+//.*$", line)[0].strip().rstrip(";").strip() if not line_no_comments.startswith("//"): current.append(line_no_comments)