Skip to content

Commit

Permalink
Merge pull request #1750 from girder/improve-ome-tiff
Browse files Browse the repository at this point in the history
Improve handling of ome-tiff files generated by bioformats
  • Loading branch information
manthey authored Jan 2, 2025
2 parents ed42a2f + 3761cad commit 96c5cea
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Harden the geojson annotation parser ([#1743](../../pull/1743))
- Add more color palettes ([#1746](../../pull/1746))
- Improve the list of extensions the bioformats source reports ([#1748](../../pull/1748))
- Improve handling of ome-tiff files generated by bioformats ([#1750](../../pull/1750))

### Changes

Expand Down
70 changes: 69 additions & 1 deletion sources/ometiff/large_image_source_ometiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def __init__(self, path, **kwargs):
msg = 'Not a recognized OME Tiff'
raise TileSourceError(msg)
info = getattr(base, '_description_record', None)
self._associatedImages = {}
if not info or not info.get('OME'):
msg = 'Not an OME Tiff'
raise TileSourceError(msg)
Expand All @@ -115,6 +116,7 @@ def __init__(self, path, **kwargs):
except KeyError:
msg = 'Not a recognized OME Tiff'
raise TileSourceError(msg)
usesSubIfds = self._checkForSubIfds(base)
omeimages = [
entry['Pixels'] for entry in self._omeinfo['Image'] if
len(entry['Pixels']['TiffData']) == len(self._omebase['TiffData'])]
Expand All @@ -125,10 +127,16 @@ def __init__(self, path, **kwargs):
omebylevel = dict(zip(levels, omeimages))
self._omeLevels = [omebylevel.get(key) for key in range(max(omebylevel.keys()) + 1)]
if base._tiffInfo.get('istiled'):
if usesSubIfds:
self._omeLevels = [None] * max(usesSubIfds) + [self._omeLevels[-1]]
self._tiffDirectories = [
self.getTiffDir(int(entry['TiffData'][0].get('IFD', 0)))
if entry else None
for entry in self._omeLevels]
if usesSubIfds:
for lvl in usesSubIfds:
if self._tiffDirectories[lvl] is None:
self._tiffDirectories[lvl] = False
else:
self._tiffDirectories = [
self.getTiffDir(0, mustBeTiled=None)
Expand All @@ -149,7 +157,6 @@ def __init__(self, path, **kwargs):
# We can get the embedded images, but we don't currently use non-tiled
# images as associated images. This would require enumerating tiff
# directories not mentioned by the ome list.
self._associatedImages = {}
self._checkForInefficientDirectories()

def _checkForOMEZLoop(self):
Expand Down Expand Up @@ -199,6 +206,40 @@ def _checkForOMEZLoop(self):
info['Image']['Pixels']['PlanesFromZloop'] = 'true'
info['Image']['Pixels']['SizeZ'] = str(zloop)

def _checkForSubIfds(self, base):
"""
Check if the first ifd has sub-ifds. If so, expect lower resolutions
to be in subifds, not in primary ifds.
:param base: base tiff directory
:returns: either False if no subifds are lower resolution, or a
dictionary of levels (keys) and values that are subifd numbers.
"""
try:
levels = int(max(0, math.ceil(max(
math.log(float(base.imageWidth) / base.tileWidth),
math.log(float(base.imageHeight) / base.tileHeight)) / math.log(2))) + 1)
filled = {}
for z in range(levels - 2, -1, -1):
subdir = levels - 1 - z
scale = int(2 ** subdir)
try:
dir = self.getTiffDir(0, mustBeTiled=True, subDirectoryNum=subdir)
except Exception:
continue
if (dir is not None and
(dir.tileWidth == base.tileWidth or dir.tileWidth == dir.imageWidth) and
(dir.tileHeight == base.tileHeight or dir.tileHeight == dir.imageHeight) and
abs(dir.imageWidth * scale - base.imageWidth) <= scale and
abs(dir.imageHeight * scale - base.imageHeight) <= scale):
filled[z] = subdir
if not len(filled):
return False
filled[levels - 1] = 0
return filled
except TiffError:
return False

def _parseOMEInfo(self): # noqa
if isinstance(self._omeinfo['Image'], dict):
self._omeinfo['Image'] = [self._omeinfo['Image']]
Expand Down Expand Up @@ -241,6 +282,33 @@ def _parseOMEInfo(self): # noqa
for entry in self._omebase['TiffData']}) > 1:
msg = 'OME Tiff references multiple files'
raise TileSourceError(msg)
if (len(self._omebase['TiffData']) ==
int(self._omebase['SizeT']) * int(self._omebase['SizeZ'])):
self._omebase['SizeC'] = 1
# DWM:: others are probably associated images
for img in self._omeinfo['Image'][1:]:
try:
if img['Name'] and img['Pixels']['TiffData'][0]['IFD']:
self._addAssociatedImage(
int(img['Pixels']['TiffData'][0]['IFD']),
None, None, img['Name'].split()[0])
except Exception:
pass
elif len(self._omeinfo['Image']) > 1:
multiple = False
for img in self._omeinfo['Image'][1:]:
try:
bpix = self._omeinfo['Image'][0]['Pixels']
imgpix = img['Pixels']
if imgpix['SizeX'] == bpix['SizeX'] and imgpix['SizeY'] == bpix['SizeY']:
multiple = True
break
except Exception:
multiple = True
if multiple:
# We should handle this as SizeXY
msg = 'OME Tiff references multiple images'
raise TileSourceError(msg)
if (len(self._omebase['TiffData']) != int(self._omebase['SizeC']) *
int(self._omebase['SizeT']) * int(self._omebase['SizeZ']) or
len(self._omebase['TiffData']) != len(
Expand Down
5 changes: 4 additions & 1 deletion sources/pil/large_image_source_pil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import math
import os
import threading
import warnings

import numpy as np
import PIL.Image
Expand Down Expand Up @@ -56,6 +57,8 @@
# package is not installed
pass

warnings.filterwarnings('ignore', category=UserWarning, module='.*PIL.*')

# Default to ignoring files with some specific extensions.
config.ConfigValues['source_pil_ignored_names'] = \
r'(\.mrxs|\.vsi)$'
Expand Down Expand Up @@ -138,7 +141,7 @@ def __init__(self, path, maxSize=None, **kwargs): # noqa
if self._pilImage is None:
try:
self._pilImage = PIL.Image.open(largeImagePath)
except OSError:
except (OSError, ValueError):
if not os.path.isfile(largeImagePath):
raise TileSourceFileNotFoundError(largeImagePath) from None
msg = 'File cannot be opened via PIL.'
Expand Down
30 changes: 22 additions & 8 deletions sources/tiff/large_image_source_tiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,12 +327,19 @@ def _initWithTiffTools(self): # noqa
self._info = info
frames = []
associated = [] # for now, a list of directories
used_subifd = False
for idx, ifd in enumerate(info['ifds']):
# if not tiles, add to associated images
if tifftools.Tag.tileWidth.value not in ifd['tags']:
associated.append(idx)
associated.append((idx, False))
continue
level = self._levelFromIfd(ifd, info['ifds'][0])
try:
level = self._levelFromIfd(ifd, info['ifds'][0])
except TileSourceError:
if idx and used_subifd:
associated.append((idx, True))
continue
raise
# if the same resolution as the main image, add a frame
if level == self.levels - 1:
frames.append({'dirs': [None] * self.levels})
Expand Down Expand Up @@ -371,9 +378,13 @@ def _initWithTiffTools(self): # noqa
tifftools.Tag.TileOffsets.value not in subifds[0]['tags']):
msg = 'Subifd has no strip or tile offsets.'
raise TileSourceMalformedError(msg)
level = self._levelFromIfd(subifds[0], info['ifds'][0])
try:
level = self._levelFromIfd(subifds[0], info['ifds'][0])
except Exception:
break
if level < self.levels - 1 and frames[-1]['dirs'][level] is None:
frames[-1]['dirs'][level] = (idx, subidx + 1)
used_subifd = True
else:
msg = 'Tile layers are in a surprising order'
raise TileSourceError(msg)
Expand Down Expand Up @@ -407,8 +418,8 @@ def _initWithTiffTools(self): # noqa
self._iccprofiles.append(ifd['tags'][
tifftools.Tag.ICCProfile.value]['data'])
self._associatedImages = {}
for dirNum in associated:
self._addAssociatedImage(dirNum)
for dirNum, isTiled in associated:
self._addAssociatedImage(dirNum, isTiled)
self._frames = frames
self._tiffDirectories = [
self.getTiffDir(
Expand Down Expand Up @@ -490,7 +501,7 @@ def _checkForVendorSpecificTags(self):
frame.setdefault('frame', {})
frame['frame']['IndexC'] = idx

def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None):
def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None, imageId=None):
"""
Check if the specified TIFF directory contains an image with a sensible
image description that can be used as an ID. If so, and if the image
Expand All @@ -501,6 +512,7 @@ def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None):
untiled images.
:param topImage: if specified, add image-embedded metadata to this
image.
:param imageId: if specified, use this as the image name.
"""
try:
associated = self.getTiffDir(directoryNum, mustBeTiled)
Expand All @@ -514,6 +526,8 @@ def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None):
id = 'dir%d' % directoryNum
if not len(self._associatedImages):
id = 'macro'
if imageId:
id = imageId
if not id and not mustBeTiled:
id = {1: 'label', 9: 'macro'}.get(associated._tiffInfo.get('subfiletype'))
if not isinstance(id, str):
Expand Down Expand Up @@ -765,7 +779,7 @@ def getAssociatedImagesList(self):
"""
imageList = set(self._associatedImages)
for td in self._tiffDirectories:
if td is not None:
if td is not None and td is not False:
imageList |= set(td._embeddedImages)
return sorted(imageList)

Expand All @@ -784,7 +798,7 @@ def _getAssociatedImage(self, imageKey):
# with seemingly bad associated images, we may need to read them with a
# more complex process than read_image.
for td in self._tiffDirectories:
if td is not None and imageKey in td._embeddedImages:
if td is not None and td is not False and imageKey in td._embeddedImages:
return PIL.Image.open(io.BytesIO(base64.b64decode(td._embeddedImages[imageKey])))
if imageKey in self._associatedImages:
return PIL.Image.fromarray(self._associatedImages[imageKey])
Expand Down
8 changes: 5 additions & 3 deletions sources/tiff/large_image_source_tiff/tiff_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,11 +788,13 @@ def getTile(self, x, y, asarray=False):

if (not self._tiffInfo.get('istiled') or
self._tiffInfo.get('compression') not in {
libtiff_ctypes.COMPRESSION_JPEG, 33003, 33005, 34712} or
libtiff_ctypes.COMPRESSION_JPEG, 33003, 33004, 33005, 34712} or
self._tiffInfo.get('bitspersample') != 8 or
self._tiffInfo.get('sampleformat') not in {
None, libtiff_ctypes.SAMPLEFORMAT_UINT} or
(asarray and self._tiffInfo.get('compression') not in {33003, 33005, 34712} and (
(asarray and self._tiffInfo.get('compression') not in {
33003, 33004, 33005, 34712,
} and (
self._tiffInfo.get('compression') != libtiff_ctypes.COMPRESSION_JPEG or
self._tiffInfo.get('photometric') != libtiff_ctypes.PHOTOMETRIC_YCBCR))):
return self._getUncompressedTile(tileNum)
Expand All @@ -811,7 +813,7 @@ def getTile(self, x, y, asarray=False):
# Get the whole frame, which is in a JPEG or JPEG 2000 format
frame = self._getJpegFrame(tileNum, True)
# For JP2K, see if we can convert it faster than PIL
if self._tiffInfo.get('compression') in {33003, 33005}:
if self._tiffInfo.get('compression') in {33003, 33004, 33005, 34712}:
try:
import openjpeg

Expand Down
4 changes: 4 additions & 0 deletions test/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@
# Synthetic Indica Labs tiff; subifds missing tile/strip data and unmarked
# float32 pixels rather than uint32
'synthetic_indica.tiff': 'sha512:fba7eb2fb5fd12ac242d8b0760440f170f48f9e2434a672cbf230bd8a9ff02fad8f9bdf7225edf2de244f412edfc5205e695031a1d43dd99fe31c3aca11909a1', # noqa
# Converted from the TCGA svs file using bioformats java program and
# --rgb --quality=0.015 --compression='JPEG-2000 Lossy' parameters to make
# the file small
'TCGA-55-8207-01Z-00-DX1.ome.tiff': 'sha512:50cf63f0e8bfa3054d3532b7dd0237b66aeb4c7609da874639a28bc068dbd157f786e84d3eb76a3b0e6636a042c56c3b96d3be2ad66f7589d0542a5d20cecdb4', # noqa
}


Expand Down
4 changes: 2 additions & 2 deletions test/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def testConvertJp2kCompression(tmpdir):
image, _ = source.getRegion(
output={'maxWidth': 200, 'maxHeight': 200}, format=constants.TILE_FORMAT_NUMPY)
# Without or with icc adjustment
assert ((image[12][167] == [215, 135, 172]).all() or
(image[12][167] == [216, 134, 172]).all())
assert ((image[12][167][:3] == [215, 135, 172]).all() or
(image[12][167][:3] == [216, 134, 172]).all())

outputPath2 = os.path.join(tmpdir, 'out2.tiff')
large_image_converter.convert(imagePath, outputPath2, compression='jp2k', psnr=50)
Expand Down
4 changes: 2 additions & 2 deletions test/test_source_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@
'openslide': {
'read': r'\.(ptif|svs|ndpi|tif.*|qptiff|dcm)$',
'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled|indica|tcia.*dcm)', # noqa
'skip': r'nokeyframe\.ome\.tiff$',
'skip': r'nokeyframe\.ome\.tiff|TCGA-55.*\.ome\.tiff$',
'skipTiles': r'one_layer_missing',
},
'pil': {
'read': r'(\.(jpg|jpeg|png|tif.*)|18[-0-9a-f]{34}\.dcm)$',
'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview|indica)', # noqa
'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview|indica|TCGA-55.*\.ome\.tiff)', # noqa
},
'rasterio': {
'read': r'(\.(jpg|jpeg|jp2|ptif|scn|svs|ndpi|tif.*|qptiff)|18[-0-9a-f]{34}\.dcm)$',
Expand Down

0 comments on commit 96c5cea

Please sign in to comment.