Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.3.0 Full Release #49

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
19 changes: 3 additions & 16 deletions padocc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,7 @@
__contact__ = "[email protected]"
__copyright__ = "Copyright 2024 United Kingdom Research and Innovation"

from padocc.phases import (
ScanOperation,
KerchunkDS,
ZarrDS,
cfa_handler,
ValidateOperation
)
from .core import ProjectOperation
from .groups import GroupOperation

phase_map = {
'scan': ScanOperation,
'compute': {
'kerchunk': KerchunkDS,
'zarr': ZarrDS,
'CFA': cfa_handler,
},
'validate': ValidateOperation
}
from .phases import phase_map
13 changes: 8 additions & 5 deletions padocc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@

import argparse

from padocc.core.utils import BypassSwitch
from padocc.operations import GroupOperation
from padocc import phase_map
from padocc.core.utils import BypassSwitch, get_attribute
from padocc import GroupOperation, phase_map

def get_args():
parser = argparse.ArgumentParser(description='Run a pipeline step for a group of datasets')
Expand All @@ -26,8 +25,9 @@ def get_args():

# Single-job within group
parser.add_argument('-G','--groupID', dest='groupID', default=None, help='Group identifier label')
parser.add_argument('-s','--subset', dest='subset', default=1, type=int, help='Size of subset within group')
parser.add_argument('-s','--subset', dest='subset', default=None, type=int, help='Size of subset within group')
parser.add_argument('-r','--repeat_id', dest='repeat_id', default='main', help='Repeat id (main if first time running, <phase>_<repeat> otherwise)')
parser.add_argument('-p','--proj_code',dest='proj_code',help='Run for a specific project code, within a group or otherwise')

# Specialised
parser.add_argument('-C','--cloud-format', dest='mode', default='kerchunk', help='Output format required.')
Expand All @@ -43,6 +43,9 @@ def get_args():
parser.add_argument('--allow-band-increase', dest='band_increase',action='store_true', help='Allow automatic banding increase relative to previous runs.')

args = parser.parse_args()

args.workdir = get_attribute('WORKDIR', args, 'workdir')

return args

def main():
Expand All @@ -65,7 +68,7 @@ def main():
)

if args.phase == 'init':
group.init_from_file(args.input_file)
group.init_from_file(args.input)
return

group.run(
Expand Down
223 changes: 11 additions & 212 deletions padocc/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def get_status(tb: list) -> str:
else:
raise err


class KerchunkException(Exception):
def __init__(self, proj_code: Union[str,None], groupdir: Union[str,None]) -> None:
self.proj_code = proj_code
Expand All @@ -65,7 +64,7 @@ def __init__(self, proj_code: Union[str,None], groupdir: Union[str,None]) -> Non
msg = getattr(self,'message')
super().__init__(msg)

class PartialDriverError(KerchunkException):
class PartialDriverError(KerchunkException): # Keep
"""All drivers failed (NetCDF3/Hdf5/Tiff) for one or more files within the list"""
def __init__(
self,
Expand All @@ -81,39 +80,8 @@ def __init__(
def get_str(self):
return 'PartialDriverError'

class NaNComparisonError(KerchunkException):
"""When comparing NaN values between objects - different values found"""
def __init__(
self,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = f"NaN values do not match between comparison objects"
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'NaNComparisonError'

class RemoteProtocolError(KerchunkException):
"""All drivers failed (NetCDF3/Hdf5/Tiff) for one or more files within the list"""
def __init__(
self,
filenums: Union[int,None] = None,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:

self.message = f"All drivers failed when performing conversion for files {filenums}"
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'PartialDriverError'

class KerchunkDriverFatalError(KerchunkException):
class KerchunkDriverFatalError(KerchunkException): # Keep
"""All drivers failed (NetCDF3/Hdf5/Tiff) - run without driver bypass to assess the issue with each driver type."""
def __init__(
self,
Expand All @@ -128,55 +96,7 @@ def __init__(
def get_str(self):
return 'KerchunkDriverFatalError'

class IdenticalVariablesError(KerchunkException):
"""All variables found to be suitably identical between files as to not stack or concatenate"""
def __init__(
self,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = "All variables are identical across files"
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'IdenticalVariablesError'

class XKShapeToleranceError(KerchunkException):
"""Attempted validation using a tolerance for shape mismatch on concat-dims, shape difference exceeds tolerance allowance."""
def __init__(
self,
tolerance: int = 0,
diff: int = 0,
dim: str = '',
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = f"Shape difference ({diff}) exceeds allowed tolerance ({tolerance}) for dimension ({dim})"
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'XKShapeToleranceError'

class BlacklistProjectCode(KerchunkException):
"""The project code you are trying to run for is on the list of project codes to ignore."""
def __init__(
self,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = 'Project Code listed in blacklist for bad data - will not be processed.'
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'BlacklistProjectCode'

class MissingVariableError(KerchunkException):
class MissingVariableError(KerchunkException): # Keep
"""A variable is missing from the environment or set of arguments."""
def __init__(
self,
Expand Down Expand Up @@ -226,38 +146,7 @@ def __init__(
def get_str(self):
return 'ExpectTimeoutError'

class ProjectCodeError(KerchunkException):
"""Could not find the correct project code from the list of project codes for this run."""
def __init__(
self,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = f'Project Code Extraction Failed'
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'ProjectCodeError'

class FilecapExceededError(KerchunkException):
"""During scanning, could not find suitable files within the set of files specified."""
def __init__(
self,
nfiles: int = 0,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = f'Filecap exceeded: {nfiles} files attempted'
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'FilecapExceededError'

class ChunkDataError(KerchunkException):
class ChunkDataError(KerchunkException): # Keep
"""Overflow Error from pandas during decoding of chunk information, most likely caused by bad data retrieval."""
def __init__(
self,
Expand Down Expand Up @@ -288,64 +177,6 @@ def __init__(
def get_str(self):
return 'NoValidTimeSlicesError'

class VariableMismatchError(KerchunkException):
"""During testing, variables present in the NetCDF file are not present in Kerchunk"""
def __init__(
self,
missing: Union[dict, None] = None,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
missing = missing or {}

self.message = f'Missing variables {missing} in Kerchunk file'
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'VariableMismatchError'

class ShapeMismatchError(KerchunkException):
"""Shapes of ND arrays do not match between Kerchunk and Xarray objects - when using a subset of the Netcdf files."""
def __init__(
self,
var: Union[dict,None] = None,
first: Union[dict,None] = None,
second: Union[dict,None] = None,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:

var = var or {}
first = first or {}
second = second or {}

self.message = f'Kerchunk/NetCDF mismatch for variable {var} with shapes - K {first} vs X {second}'
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'ShapeMismatchError'

class TrueShapeValidationError(KerchunkException):
"""Shapes of ND arrays do not match between Kerchunk and Xarray objects - when using the complete set of files."""
def __init__(
self,
message: str = 'kerchunk',
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:

self.message = f'{message} mismatch with shapes using full dataset - check logs'
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'TrueShapeValidationError'

class NoOverwriteError(KerchunkException):
"""Output file already exists and the process does not have forceful overwrite (-f) set."""
def __init__(
Expand All @@ -362,7 +193,7 @@ def __init__(
def get_str(self):
return 'NoOverwriteError'

class MissingKerchunkError(KerchunkException):
class MissingKerchunkError(KerchunkException): # Keep
"""Kerchunk file not found."""
def __init__(
self,
Expand All @@ -385,14 +216,14 @@ def __init__(
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:
self.message = "Fatal Validation Error"
self.message = "Fatal Validation Error - see data report."
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'ValidationError'

class ComputeError(KerchunkException):
class ComputeError(KerchunkException): # Keep
"""Compute stage failed - likely due to invalid config/use of the classes"""
def __init__(
self,
Expand All @@ -406,22 +237,6 @@ def __init__(
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'ComputeError'

class SoftfailBypassError(KerchunkException):
"""Validation could not be completed because some arrays only contained NaN values which cannot be compared."""
def __init__(
self,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:

self.message = "Kerchunk validation failed softly with no bypass - rerun with bypass flag"
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'SoftfailBypassError'

class ConcatenationError(KerchunkException):
"""Variables could not be concatenated over time and are not duplicates - no known solution"""
Expand All @@ -439,7 +254,7 @@ def __init__(
def get_str(self):
return 'ConcatenationError'

class ConcatFatalError(KerchunkException):
class ConcatFatalError(KerchunkException): # Keep
"""Chunk sizes differ between refs - files cannot be concatenated"""
def __init__(
self,
Expand All @@ -458,7 +273,7 @@ def __init__(
def get_str(self):
return 'ConcatFatalError'

class SourceNotFoundError(KerchunkException):
class SourceNotFoundError(KerchunkException): # Keep
"""Source File could not be located."""
def __init__(
self,
Expand Down Expand Up @@ -492,7 +307,7 @@ def __init__(
def get_str(self):
return 'ArchiveConnectError'

class KerchunkDecodeError(KerchunkException):
class KerchunkDecodeError(KerchunkException): # Keep
"""Decoding of Kerchunk file failed - likely a time array issue."""
def __init__(
self,
Expand All @@ -506,20 +321,4 @@ def __init__(
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'KerchunkDecodeError'

class FullsetRequiredError(KerchunkException):
"""This project must be validated using the full set of files."""
def __init__(
self,
verbose: int = 0,
proj_code: Union[str,None] = None,
groupdir: Union[str,None] = None
) -> None:

self.message = f"This project must be validated by opening the full set of files."
super().__init__(proj_code, groupdir)
if verbose < 1:
self.__class__.__module__ = 'builtins'
def get_str(self):
return 'FullsetRequiredError'
return 'KerchunkDecodeError'
Loading
Loading