From 755c5d98b850b3bece0f30db05d7c3ec10cf43eb Mon Sep 17 00:00:00 2001 From: Ciheim Brown Date: Sun, 28 Apr 2024 20:35:05 -0400 Subject: [PATCH] #115 #117 Catching errors and Ignoring Files --- intakebuilder/getinfo.py | 10 +++++++--- intakebuilder/gfdlcrawler.py | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/intakebuilder/getinfo.py b/intakebuilder/getinfo.py index 2d170e3..1af838c 100644 --- a/intakebuilder/getinfo.py +++ b/intakebuilder/getinfo.py @@ -84,7 +84,7 @@ def getInfoFromFilename(filename,dictInfo,logger): #adding this back to trace back some old errors def getInfoFromGFDLFilename(filename,dictInfo,logger): # 5 AR: get the following from the netCDF filename e.g. atmos.200501-200912.t_ref.nc - if(filename.endswith(".nc")): + if(filename.endswith(".nc") and not filename.startswith(".")): ncfilename = filename.split(".") varname = ncfilename[-2] dictInfo["variable_id"] = varname @@ -131,8 +131,12 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo): for i in range(nlen-1,0,-1): try: if(builderconfig.output_path_template[i] != "NA"): - dictInfo[builderconfig.output_path_template[i]] = stemdir[(j)] - except: + try: + dictInfo[builderconfig.output_path_template[i]] = stemdir[(j)] + except IndexError: + print("Check configuration. Is output path template set correctly?") + exit() + except IndexError: sys.exit("oops in getInfoFromGFDLDRS"+str(i)+str(j)+builderconfig.output_path_template[i]+stemdir[j]) j = j - 1 cnt = cnt + 1 diff --git a/intakebuilder/gfdlcrawler.py b/intakebuilder/gfdlcrawler.py index abc3f19..5d38d52 100644 --- a/intakebuilder/gfdlcrawler.py +++ b/intakebuilder/gfdlcrawler.py @@ -19,6 +19,7 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger): pat = re.compile('({}/{}/{}/{})'.format(dictFilter["modeling_realm"],"ts",dictFilter["frequency"],dictFilter["chunk_freq"])) orig_pat = pat + #TODO INCLUDE filter in traversing through directories at the top for dirpath, dirs, files in os.walk(projectdir): searchpath = dirpath @@ -27,14 +28,17 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger): if(pat is not None): m = re.search(pat, searchpath) for filename in files: + if filename.startswith("."): + logger.debug("Skipping hidden file", filepath) + continue + if not filename.endswith(".nc"): + logger.debug("FILE does not end with .nc. Skipping", filepath) + continue logger.info(dirpath+"/"+filename) dictInfo = {} dictInfo = getinfo.getProject(projectdir, dictInfo) # get info from filename filepath = os.path.join(dirpath,filename) # 1 AR: Bugfix: this needs to join dirpath and filename to get the full path to the file - if not filename.endswith(".nc"): - logger.debug("FILE does not end with .nc. Skipping", filepath) - continue dictInfo["path"]=filepath dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger) dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo)