Skip to content

Commit

Permalink
0.4 added
Browse files Browse the repository at this point in the history
  • Loading branch information
braysia authored and braysia committed Nov 16, 2017
2 parents d16afec + d2bcb22 commit 3d12a2f
Show file tree
Hide file tree
Showing 66 changed files with 1,327 additions and 134 deletions.
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
[submodule "celltk/labeledarray"]
path = celltk/labeledarray
url = https://github.com/braysia/labeledarray
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@ where
- labels: np.ndarray[np.int16] (e.g. nuclear objects)
\* tracked objects have consistent values over frames

For each processes, you can find a module named ___\*\_operation.py___. (e.g. _celltk/preprocess_operations.py_).
These files simply contain a list of functions which takes an input and convert images.

For each processes, you can find a module named ___\*\_operation.py___. (e.g. _celltk/preprocess_operations.py_).

These files are the "repositories" of functions.
They simply contain a list of functions which takes an input and convert images. If you need a new function, simply add it to here.


When you input a raw image, it should take TIFF or PNG files with various datatypes as well.

Expand Down
37 changes: 33 additions & 4 deletions celltk/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
need to deal with parent id
"""

from scipy.ndimage import imread
from utils.util import imread
import argparse
import tifffile as tiff
from os.path import basename, join, dirname, abspath
Expand All @@ -16,17 +16,44 @@
except:
from celltk.labeledarray import LabeledArray
from os.path import exists
from utils.file_io import make_dirs
from utils.file_io import make_dirs, lbread
import pandas as pd
import logging
from scipy.ndimage.morphology import binary_fill_holes
from scipy.ndimage.morphology import binary_dilation

logger = logging.getLogger(__name__)


PROP_SAVE = ['area', 'cell_id', 'convex_area', 'cv_intensity',
'eccentricity', 'major_axis_length', 'minor_axis_length', 'max_intensity',
'mean_intensity', 'median_intensity', 'min_intensity', 'orientation',
'perimeter', 'solidity', 'std_intensity', 'total_intensity', 'x', 'y']
'perimeter', 'solidity', 'std_intensity', 'total_intensity', 'x', 'y', 'parent', 'num_seg']


def find_all_children(labels):

mask = binary_fill_holes(labels < 0)
mask[labels < 0] = False
return np.unique(labels[mask]).tolist()


def find_parent_label(labels, child_label):
mask = binary_dilation(labels == child_label)
mask[labels == child_label] = False
assert len(np.unique(labels[mask])) == 1
return labels[mask][0]


def add_parent(cells, labels):
children_labels = find_all_children(labels)
for cl in children_labels:
parent_label = find_parent_label(labels, cl)
child = [cell for cell in cells if cell.label == cl]
assert len(child) == 1
child[0].parent = abs(parent_label)
return cells



# def add_parent_id(labels, img, cells):
Expand Down Expand Up @@ -83,8 +110,10 @@ def caller(inputs_list, inputs_labels_list, output, primary, secondary):
for inputs_labels, obj in zip(inputs_labels_list, obj_names):
logger.info("Channel {0}: {1} applied...".format(ch, obj))
for frame, (path, pathl) in enumerate(zip(inputs, inputs_labels)):
img, labels = imread(path), tiff.imread(pathl).astype(np.int32)
img, labels = imread(path), lbread(pathl, nonneg=False)
cells = regionprops(labels, img)
if (labels < 0).any():
cells = add_parent(cells, labels)
[setattr(cell, 'frame', frame) for cell in cells]
cells = [Cell(cell) for cell in cells]
store.append(cells)
Expand Down
40 changes: 28 additions & 12 deletions celltk/caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import yaml
import multiprocessing
from utils.file_io import make_dirs
import sys

logger = logging.getLogger(__name__)

Expand All @@ -18,23 +19,31 @@ def extract_path(path):
return f


def prepare_path_list(inputs, outputdir):
def parse_lazy_syntax(inputs, outputdir):
if isinstance(inputs, str):
in0 = glob(inputs)
in0 = sorted(glob(inputs))
if not in0:
in0 = glob(join(outputdir, inputs))
in0 = sorted(glob(join(outputdir, inputs)))
if isdir(in0[0]):
in0 = glob(join(in0[0], '*'))
in0 = sorted(glob(join(in0[0], '*')))
elif isinstance(inputs, list):
if all([exists(i) for i in inputs]):
return inputs
in0 = zip(*[glob(i) for i in inputs])
in0 = zip(*[sorted(glob(i)) for i in inputs])
if not in0:
in0 = zip(*[glob(join(i, '*')) for i in inputs])
in0 = zip(*[sorted(glob(join(i, '*'))) for i in inputs])
if not in0:
in0 = zip(*[extract_path(join(outputdir, i)) for i in inputs])
# if not in0:
# in0 = zip(*[glob(join(outputdir, i, '*')) for i in inputs])
in0 = zip(*[sorted(extract_path(join(outputdir, i))) for i in inputs])
return in0


def prepare_path_list(inputs, outputdir):
try:
in0 = parse_lazy_syntax(inputs, outputdir)
except IndexError:
logger.info("Images \"{0}\" not found. Check your path".format(inputs))
print "Images \"{0}\" not found. Check your path".format(inputs)
sys.exit(1)
return in0


Expand Down Expand Up @@ -72,6 +81,8 @@ def _retrieve_caller_based_on_function(function):
def run_operation(output_dir, operation):
functions, params, images, labels, output = parse_operation(operation)
inputs = prepare_path_list(images, output_dir)
logger.info(inputs)

inputs_labels = prepare_path_list(labels, output_dir)
output = join(output_dir, output) if output else output_dir
caller = _retrieve_caller_based_on_function(functions[0])
Expand Down Expand Up @@ -99,23 +110,28 @@ def load_yaml(path):

def single_call(inputs):
contents = load_yaml(inputs)
call_operations(contents)


def call_operations(contents):
make_dirs(contents['OUTPUT_DIR'])
logging.basicConfig(filename=join(contents['OUTPUT_DIR'], 'log.txt'), level=logging.DEBUG)
logging.getLogger("PIL").setLevel(logging.WARNING)

logger.debug('INPUT:\n{0}'.format(inputs))
run_operations(contents['OUTPUT_DIR'], contents['operations'])
logger.info("Caller finished.")


def main():
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("-n", "--cores", help="number of cores for multiprocessing",
type=int, default=1)
parser.add_argument("input", nargs="*", help="input argument file path")
args = parser.parse_args()
return args


def main():
args = parse_args()
if len(args.input) == 1:
single_call(args.input[0])
if len(args.input) > 1:
Expand Down
1 change: 0 additions & 1 deletion celltk/labeledarray
Submodule labeledarray deleted from 39d847
21 changes: 21 additions & 0 deletions celltk/labeledarray/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2017 braysia

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
27 changes: 27 additions & 0 deletions celltk/labeledarray/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# LabeledArray

Numpy array subclass for indexing by strings.

Using multi-index in pandas sometimes provides complications in terms of "copies vs views". This array is to provide numpy.array's behavior and still enable to slice array by strings.

Underlying data can be 2D, 3D or N-dimensional array. First dimension will be used for labels (multi-index).

```
arr = np.zeros((3, 20, 100))
labels = np.array([['nuc' ,'area', ''],
['nuc' ,'FITC' , 'min_intensity'],
['nuc' ,'FITC' , 'max_intensity']], dtype=object)
larr = LabeledArray(arr, labels)
print larr.shape
print larr['nuc', 'FITC'].shape
print larr['nuc', 'FITC', 'max_intensity'].shape
```

The extra attributes including labels are automatically saved and loaded with the array.
```
larr = LabeledArray(arr, labels)
larr.time = np.arange(arr.shape[-1])
larr.save('temp')
new_larr = LabeledArray().load('temp')
print new_larr.time
```
1 change: 1 addition & 0 deletions celltk/labeledarray/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from labeledarray.labeledarray import LabeledArray
Empty file.
163 changes: 163 additions & 0 deletions celltk/labeledarray/labeledarray/labeledarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""
TODO: check if labels is unique.
"""
import numpy as np
from collections import OrderedDict
from utils import sort_labels_and_arr, uniform_list_length


class LabeledArray(np.ndarray):
"""
Each rows corresponds to labels, each columns corresponds to cells.
Underlying data structure can be N-dimensional array. First dimension will be used for labeled array.
Examples:
>> arr = np.arange(12).reshape((3, 2, 2))
>> labelarr = np.array([['a1' ,'b1', ''],
['a1' ,'b2' , 'c1'],
['a1' ,'b2' , 'c2']], dtype=object)
>> darr = DArray(arr, labelarr)
>> assert darr['a1'].shape
(3, 2, 2)
>> darr['a1', 'b1'].shape
(2, 2)
>> darr['a1', 'b2', 'c1']
DArray([[4, 5],
[6, 7]])
"""

idx = None
labels = None

def __new__(cls, arr=None, labels=None, idx=None):
if arr is None:
return np.asarray(arr).view(cls)
labels, arr = sort_labels_and_arr(labels, arr)
if not isinstance(labels, np.ndarray) and labels is not None:
labels = np.array(uniform_list_length(labels), dtype=object)
obj = np.asarray(arr).view(cls)
obj.labels = labels
obj.idx = idx
return obj

def __array_finalize__(self, obj):
if obj is None: return
self.labels = getattr(obj, 'labels', None)
if self.labels is None: return
if hasattr(obj, 'idx') and self.ndim >= 1:
if obj.idx is None: return
if isinstance(obj.idx, int):
self.labels = self.labels[obj.idx]
else:
self.labels = self.labels[obj.idx[0]]
if isinstance(self.labels, str):
return
if self.labels.ndim > 1:
f_leftshift = lambda a1:all(x>=y for x, y in zip(a1, a1[1:]))
all_column = np.all(self.labels == self.labels[0,:], axis=0)
sl = 0 if not f_leftshift(all_column) else all_column.sum()
self.labels = self.labels[:, slice(sl, None)]
if self.labels.ndim == 1:
self.labels = None
obj = np.array(obj)

def __getitem__(self, item):
if isinstance(item, str):
item = self._label2idx(item)
if isinstance(item, tuple):
if isinstance(item[0], str):
item = self._label2idx(item)
self.idx = item
ret = super(LabeledArray, self).__getitem__(item)
return ret.squeeze()

def _label2idx(self, item):
item = (item, ) if not isinstance(item, tuple) else item
boolarr = np.ones(self.labels.shape[0], dtype=bool)
for num, it in enumerate(item):
boolarr = boolarr * (self.labels[:, num]==it)
tidx = np.where(boolarr)[0]
if boolarr.sum() == 1:
return tuple(tidx)
if boolarr.all():
return (slice(None, None, None), ) + (slice(None, None, None), ) * (self.ndim - 1)
minidx = min(tidx) if min(tidx) > 0 else None
maxidx = max(tidx) if max(tidx) < self.shape[0] - 1 else None
if boolarr.sum() > 1:
return (slice(minidx, maxidx, None), ) + (slice(None, None, None), ) * (self.ndim - 1)

def vstack(self, larr):
"""merging first dimension (more labels)
"""
if self.ndim > larr.ndim:
larr = np.expand_dims(larr, axis=0)
return LabeledArray(np.vstack((self, larr)), np.vstack((self.labels, larr.labels)))

def hstack(self, larr):
"""merging second dimension (more cells)
"""
if (self.labels == larr.labels).all():
return LabeledArray(np.hstack((self, larr)), self.labels)

def save(self, file_name):
extra_fields = set(dir(self)).difference(set(dir(self.__class__)))
data = dict(arr=self, labels=self.labels)
for ef in extra_fields:
data[ef] = getattr(self, ef)
np.savez_compressed(file_name, **data)

@classmethod
def load(cls, file_name):
if not file_name.endswith('.npz'):
file_name = file_name + '.npz'
f = np.load(file_name)
arr, labels = f['arr'], f['labels']
la = LabeledArray(arr, labels)
for key, value in f.iteritems():
if not ('arr' == key or 'labels' == key):
setattr(la, key, value)
return la


if __name__ == "__main__":
# Check 2D.
arr = np.random.rand(3, 100)
labelarr = np.array([['a1', 'b1', ''],
['a1' ,'b2' , 'c1'],
['a1' ,'b2' , 'c2']], dtype=object)
darr = LabeledArray(arr, labelarr)
# stop
assert darr['a1'].shape == (3, 100)
assert darr['a1', 'b1'].shape == (100, )
assert darr['a1', 'b2'].shape == (2, 100)
assert darr['a1', 'b2', 'c1'].shape == (100, )

# check 3D.
arr = np.arange(12).reshape((3, 2, 2))
labelarr = np.array([['a1' ,'b1', ''],
['a1' ,'b2' , 'c1'],
['a1' ,'b2' , 'c2']], dtype=object)
darr = LabeledArray(arr, labelarr)
assert darr['a1'].shape == (3, 2, 2)
assert darr['a1', 'b1'].shape == (2, 2)
assert darr['a1', 'b2'].shape == (2, 2, 2)
assert darr['a1', 'b2', 'c1'].shape == (2, 2)
assert darr.shape == (3, 2, 2)
assert darr[1:, :, :].shape == (2, 2, 2)
assert darr[1, :, :].shape == (2, 2)
assert np.all(darr['a1', 'b2'].labels == np.array([['c1'], ['c2']]))

# can save and load extra fields. add "time" for example.
darr.time = np.arange(darr.shape[-1])
darr.save('test')
cc = LabeledArray().load('test.npz')
assert cc.time.shape == (2,)
cc[0:2, :, :]
cc['a1', 'b1'][0, 0] = 100
assert np.sum(cc == 100) == 1

assert darr.vstack(darr).shape == (2 * darr.shape[0], darr.shape[1], darr.shape[2])
assert darr.hstack(darr).shape == (darr.shape[0], 2 * darr.shape[1], darr.shape[2])


Binary file added celltk/labeledarray/labeledarray/temp.npz
Binary file not shown.
Binary file added celltk/labeledarray/labeledarray/test.npz
Binary file not shown.
Loading

0 comments on commit 3d12a2f

Please sign in to comment.