Skip to content

Commit

Permalink
Merge pull request #1768 from girder/harden-readers
Browse files Browse the repository at this point in the history
Harden reading a variety of test files
  • Loading branch information
manthey authored Jan 9, 2025
2 parents e556863 + b6b82c6 commit e31d221
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- Improve writing to zarr sinks from multiple processes ([#1713](../../pull/1713))
- Slightly faster GDAL validateCOG ([#1761](../../pull/1761))
- Improve clearing caches ([#1766](../../pull/1766))
- Harden many of the source reads ([#1768](../../pull/1768))

### Changes

Expand Down
5 changes: 4 additions & 1 deletion large_image/tilesource/geo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import pathlib
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
from urllib.parse import urlencode, urlparse
Expand Down Expand Up @@ -186,7 +187,7 @@ def _setDefaultStyle(self) -> None:
self._bandNames = {}
for idx, band in self.getBandInformation().items():
if band.get('interpretation'):
self._bandNames[band['interpretation'].lower()] = idx
self._bandNames[str(band['interpretation']).lower()] = idx
if isinstance(getattr(self, '_style', None), dict) and (
not self._style or 'icc' in self._style and len(self._style) == 1):
return
Expand Down Expand Up @@ -277,6 +278,8 @@ def getNativeMagnification(self) -> Dict[str, Optional[float]]:
:return: width of a pixel in mm, height of a pixel in mm.
"""
scale = self.getPixelSizeInMeters()
if scale and not math.isfinite(scale):
scale = None
return {
'magnification': None,
'mm_x': scale * 100 if scale else None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@


# Default to ignoring files with no extension and some specific extensions.
config.ConfigValues['source_bioformats_ignored_names'] = r'(^[^.]*|\.(jpg|jpeg|jpe|png|tif|tiff|ndpi|nd2|ome|nc|json|geojson|isyntax|mrxs|zip|zarr(\.db|\.zip)))$' # noqa
config.ConfigValues['source_bioformats_ignored_names'] = r'(^[^.]*|\.(jpg|jpeg|jpe|png|tif|tiff|ndpi|nd2|ome|nc|json|geojson|fits|isyntax|mrxs|zip|zarr(\.db|\.zip)))$' # noqa


def _monitor_thread():
Expand Down
10 changes: 7 additions & 3 deletions sources/gdal/large_image_source_gdal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ class GDALFileTileSource(GDALBaseFileTileSource, metaclass=LruCacheMetaclass):
cacheName = 'tilesource'
name = 'gdal'

VECTOR_IMAGE_SIZE = 256 * 1024
VECTOR_IMAGE_SIZE = 256 * 1024 # for vector files without projections
PROJECTED_VECTOR_IMAGE_SIZE = 32 * 1024 # if the file has a projection

def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): # noqa
"""
Expand Down Expand Up @@ -155,7 +156,7 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): # noqa
is_netcdf = self._checkNetCDF()
try:
scale = self.getPixelSizeInMeters()
except RuntimeError as exc:
except (RuntimeError, ZeroDivisionError) as exc:
raise TileSourceError('File cannot be opened via GDAL: %r' % exc)
if not self.sizeX or not self.sizeY:
msg = 'File cannot be opened via GDAL (no size)'
Expand Down Expand Up @@ -192,7 +193,8 @@ def _openVectorSource(self, vec):
except Exception:
proj = None
# Define raster parameters
pixel_size = max(x_max - x_min, y_max - y_min) / self.VECTOR_IMAGE_SIZE
pixel_size = max(x_max - x_min, y_max - y_min) / (
self.VECTOR_IMAGE_SIZE if proj is None else self.PROJECTED_VECTOR_IMAGE_SIZE)
if not pixel_size:
msg = 'Cannot determine dimensions'
raise RuntimeError(msg)
Expand All @@ -212,6 +214,8 @@ def _openVectorSource(self, vec):
ds.SetGeoTransform((x_min, pixel_size, 0, y_min, 0, pixel_size))
if proj:
ds.SetProjection(proj)
msg = f'Rasterizing a vector layer to {x_res} x {y_res}'
self.logger.info(msg)
gdal.RasterizeLayer(ds, [1], layer, burn_values=[255])
if not hasattr(self.__class__, '_openVectorLock'):
self.__class__._openVectorLock = threading.RLock()
Expand Down
9 changes: 9 additions & 0 deletions sources/mapnik/large_image_source_mapnik/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs):
projection = projection.lower()
super().__init__(
path, projection=projection, unitsPerPixel=unitsPerPixel, **kwargs)
if self.dataset.GetDriver().ShortName in {'MBTiles', 'Rasterlite', 'SQLite'}:
msg = 'File will not be opened via mapbox'
raise TileSourceError(msg)
self.logger.debug('mapnik source using the GDAL %s driver',
self.dataset.GetDriver().ShortName)

def _openVectorSource(self, ds):
msg = 'File will not be opened via mapnik'
raise TileSourceError(msg)

def _checkNetCDF(self):
"""
Expand Down
8 changes: 6 additions & 2 deletions sources/pil/large_image_source_pil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def __init__(self, path, maxSize=None, **kwargs): # noqa
if self._pilImage is None:
try:
self._pilImage = PIL.Image.open(largeImagePath)
except (OSError, ValueError):
except (OSError, ValueError, NotImplementedError):
if not os.path.isfile(largeImagePath):
raise TileSourceFileNotFoundError(largeImagePath) from None
msg = 'File cannot be opened via PIL.'
Expand Down Expand Up @@ -178,7 +178,11 @@ def __init__(self, path, maxSize=None, **kwargs): # noqa
except Exception:
msg = 'PIL cannot find loader for this file.'
raise TileSourceError(msg)
maxval = 256 ** math.ceil(math.log(float(np.max(imgdata)) + 1, 256)) - 1
try:
maxval = 256 ** math.ceil(math.log(float(np.max(imgdata)) + 1, 256)) - 1
except Exception:
msg = 'PIL cannot load this file.'
raise TileSourceError(msg)
self._factor = 255.0 / max(maxval, 1)
self._pilImage = PIL.Image.fromarray(np.uint8(np.multiply(
imgdata, self._factor)))
Expand Down
6 changes: 5 additions & 1 deletion sources/rasterio/large_image_source_rasterio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,11 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
width=self.tileWidth,
add_alpha=add_alpha,
) as vrt:
tile = vrt.read(resampling=rio.enums.Resampling.nearest)
try:
tile = vrt.read(resampling=rio.enums.Resampling.nearest)
except Exception:
self.logger.exception('Failed to getTile')
tile = np.zeros((1, 1))

# necessary for multispectral images:
# set the coordinates first and the bands at the end
Expand Down
28 changes: 23 additions & 5 deletions test/lisource_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,16 @@ def source_compare(sourcePath, opts): # noqa
'_geospatial_source', None):
continue
result = results['styles'][-1]['sources'][source] = {}
sys.stdout.write('%s' % (source + ' ' * (slen - len(source))))
sys.stdout.flush()
large_image.cache_util.cachesClear()
try:
t = time.time()
ts = large_image.tilesource.AvailableTileSources[source](sourcePath, **kwargs)
opentime = time.time() - t
except Exception as exp:
if opts.can_read and projection and None in projections:
continue
sys.stdout.write('%s' % (source + ' ' * (slen - len(source))))
sys.stdout.flush()
result['exception'] = str(exp)
result['error'] = 'open'
sexp = str(exp).replace('\n', ' ').replace(' ', ' ').strip()
Expand All @@ -224,6 +226,8 @@ def source_compare(sourcePath, opts): # noqa
sys.stdout.write('%s %s\n' % (' ' * slen, sexp[78 - slen: 2 * (78 - slen)]))
sys.stdout.flush()
continue
sys.stdout.write('%s' % (source + ' ' * (slen - len(source))))
sys.stdout.flush()
sizeX, sizeY = ts.sizeX, ts.sizeY
result['sizeX'], result['sizeY'] = ts.sizeX, ts.sizeY
try:
Expand Down Expand Up @@ -304,7 +308,13 @@ def source_compare(sourcePath, opts): # noqa
sys.stdout.flush()
write_thumb(img[0], source, thumbs, 'thumbnail', opts, styleidx, projidx)
t = time.time()
img = ts.getTile(tx0, ty0, tz0, sparseFallback=True)
try:
img = ts.getTile(tx0, ty0, tz0, sparseFallback=True)
except Exception as exp:
result['exception'] = str(exp)
result['error'] = 'gettile'
sys.stdout.write(' fail\n')
continue
tile0time = time.time() - t
result['tile0time'] = tile0time
sys.stdout.write(' %8.3fs' % tile0time)
Expand Down Expand Up @@ -395,11 +405,19 @@ def source_compare(sourcePath, opts): # noqa
onlyMinMax=True, output=dict(maxWidth=2048, maxHeight=2048),
resample=0, **kwargs)
if 'max' not in h:
result['error'] = 'max'
sys.stdout.write(' fail\n')
sys.stdout.flush()
continue
try:
maxval = max(h['max'].tolist())
maxval = 2 ** (int(math.log(maxval or 1) / math.log(2)) + 1) if maxval > 1 else 1
except (TypeError, OverflowError) as exp:
result['exception'] = str(exp)
result['error'] = 'maxval'
sys.stdout.write(' fail\n')
sys.stdout.flush()
continue
maxval = max(h['max'].tolist())
maxval = 2 ** (int(math.log(maxval or 1) / math.log(2)) + 1) if maxval > 1 else 1
# thumbnail histogram
h = ts.histogram(bins=9, output=dict(maxWidth=256, maxHeight=256),
range=[0, maxval], resample=0, **kwargs)
Expand Down

0 comments on commit e31d221

Please sign in to comment.