0.4 added

CovertLab · Nov 16, 2017 · 3d12a2f · 3d12a2f
2 parents d16afec + d2bcb22
commit 3d12a2f
Show file tree

Hide file tree

Showing 66 changed files with 1,327 additions and 134 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "celltk/labeledarray"]
-	path = celltk/labeledarray
-	url = https://github.com/braysia/labeledarray

diff --git a/README.md b/README.md
@@ -17,8 +17,12 @@ where
 - labels: np.ndarray[np.int16] (e.g. nuclear objects)  
 \* tracked objects have consistent values over frames
 
-For each processes, you can find a module named ___\*\_operation.py___. (e.g. _celltk/preprocess_operations.py_).  
-These files simply contain a list of functions which takes an input and convert images.  
+
+For each processes, you can find a module named ___\*\_operation.py___. (e.g. _celltk/preprocess_operations.py_).    
+
+These files are the "repositories" of functions.  
+They simply contain a list of functions which takes an input and convert images. If you need a new function, simply add it to here.
+
 
 When you input a raw image, it should take TIFF or PNG files with various datatypes as well.
 

diff --git a/celltk/apply.py b/celltk/apply.py
@@ -5,7 +5,7 @@
 need to deal with parent id
 """
 
-from scipy.ndimage import imread
+from utils.util import imread
 import argparse
 import tifffile as tiff
 from os.path import basename, join, dirname, abspath
@@ -16,17 +16,44 @@
 except:
     from celltk.labeledarray import LabeledArray
 from os.path import exists
-from utils.file_io import make_dirs
+from utils.file_io import make_dirs, lbread
 import pandas as pd
 import logging
+from scipy.ndimage.morphology import binary_fill_holes
+from scipy.ndimage.morphology import binary_dilation
 
 logger = logging.getLogger(__name__)
 
 
 PROP_SAVE = ['area', 'cell_id', 'convex_area', 'cv_intensity',
              'eccentricity', 'major_axis_length', 'minor_axis_length', 'max_intensity',
              'mean_intensity', 'median_intensity', 'min_intensity', 'orientation',
-             'perimeter', 'solidity', 'std_intensity', 'total_intensity', 'x', 'y']
+             'perimeter', 'solidity', 'std_intensity', 'total_intensity', 'x', 'y', 'parent', 'num_seg']
+
+
+def find_all_children(labels):
+
+    mask = binary_fill_holes(labels < 0)
+    mask[labels < 0] = False
+    return np.unique(labels[mask]).tolist()
+
+
+def find_parent_label(labels, child_label):
+    mask = binary_dilation(labels == child_label)
+    mask[labels == child_label] = False
+    assert len(np.unique(labels[mask])) == 1
+    return labels[mask][0]
+
+
+def add_parent(cells, labels):
+    children_labels = find_all_children(labels)
+    for cl in children_labels:
+        parent_label = find_parent_label(labels, cl)
+        child = [cell for cell in cells if cell.label == cl]
+        assert len(child) == 1
+        child[0].parent = abs(parent_label)
+    return cells
+
 
 
 # def add_parent_id(labels, img, cells):
@@ -83,8 +110,10 @@ def caller(inputs_list, inputs_labels_list, output, primary, secondary):
         for inputs_labels, obj in zip(inputs_labels_list, obj_names):
             logger.info("Channel {0}: {1} applied...".format(ch, obj))
             for frame, (path, pathl) in enumerate(zip(inputs, inputs_labels)):
-                img, labels = imread(path), tiff.imread(pathl).astype(np.int32)
+                img, labels = imread(path), lbread(pathl, nonneg=False)
                 cells = regionprops(labels, img)
+                if (labels < 0).any():
+                    cells = add_parent(cells, labels)
                 [setattr(cell, 'frame', frame) for cell in cells]
                 cells = [Cell(cell) for cell in cells]
                 store.append(cells)

diff --git a/celltk/caller.py b/celltk/caller.py
@@ -7,6 +7,7 @@
 import yaml
 import multiprocessing
 from utils.file_io import make_dirs
+import sys
 
 logger = logging.getLogger(__name__)
 
@@ -18,23 +19,31 @@ def extract_path(path):
     return f
 
 
-def prepare_path_list(inputs, outputdir):
+def parse_lazy_syntax(inputs, outputdir):
     if isinstance(inputs, str):
-        in0 = glob(inputs)
+        in0 = sorted(glob(inputs))
         if not in0:
-            in0 = glob(join(outputdir, inputs))
+            in0 = sorted(glob(join(outputdir, inputs)))
         if isdir(in0[0]):
-            in0 = glob(join(in0[0], '*'))
+            in0 = sorted(glob(join(in0[0], '*')))
     elif isinstance(inputs, list):
         if all([exists(i) for i in inputs]):
             return inputs
-        in0 = zip(*[glob(i) for i in inputs])
+        in0 = zip(*[sorted(glob(i)) for i in inputs])
         if not in0:
-            in0 = zip(*[glob(join(i, '*')) for i in inputs])
+            in0 = zip(*[sorted(glob(join(i, '*'))) for i in inputs])
         if not in0:
-            in0 = zip(*[extract_path(join(outputdir, i)) for i in inputs])
-        # if not in0:
-        #     in0 = zip(*[glob(join(outputdir, i, '*')) for i in inputs])
+            in0 = zip(*[sorted(extract_path(join(outputdir, i))) for i in inputs])
+    return in0
+
+
+def prepare_path_list(inputs, outputdir):
+    try:
+        in0 = parse_lazy_syntax(inputs, outputdir)
+    except IndexError:
+        logger.info("Images \"{0}\" not found. Check your path".format(inputs))
+        print "Images \"{0}\" not found. Check your path".format(inputs)
+        sys.exit(1)
     return in0
 
 
@@ -72,6 +81,8 @@ def _retrieve_caller_based_on_function(function):
 def run_operation(output_dir, operation):
     functions, params, images, labels, output = parse_operation(operation)
     inputs = prepare_path_list(images, output_dir)
+    logger.info(inputs)
+
     inputs_labels = prepare_path_list(labels, output_dir)
     output = join(output_dir, output) if output else output_dir
     caller = _retrieve_caller_based_on_function(functions[0])
@@ -99,23 +110,28 @@ def load_yaml(path):
 
 def single_call(inputs):
     contents = load_yaml(inputs)
+    call_operations(contents)
 
+
+def call_operations(contents):
     make_dirs(contents['OUTPUT_DIR'])
     logging.basicConfig(filename=join(contents['OUTPUT_DIR'], 'log.txt'), level=logging.DEBUG)
     logging.getLogger("PIL").setLevel(logging.WARNING)
-
-    logger.debug('INPUT:\n{0}'.format(inputs))
     run_operations(contents['OUTPUT_DIR'], contents['operations'])
     logger.info("Caller finished.")
 
 
-def main():
+def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("-n", "--cores", help="number of cores for multiprocessing",
                         type=int, default=1)
     parser.add_argument("input", nargs="*", help="input argument file path")
     args = parser.parse_args()
+    return args
 
+
+def main():
+    args = parse_args()
     if len(args.input) == 1:
         single_call(args.input[0])
     if len(args.input) > 1:

diff --git a/celltk/labeledarray b/celltk/labeledarray
diff --git a/celltk/labeledarray/LICENSE b/celltk/labeledarray/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 braysia
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/celltk/labeledarray/README.md b/celltk/labeledarray/README.md
@@ -0,0 +1,27 @@
+# LabeledArray
+
+Numpy array subclass for indexing by strings.  
+
+Using multi-index in pandas sometimes provides complications in terms of "copies vs views". This array is to provide numpy.array's behavior and still enable to slice array by strings.
+
+Underlying data can be 2D, 3D or N-dimensional array. First dimension will be used for labels (multi-index).
+
+```
+arr = np.zeros((3, 20, 100))
+labels = np.array([['nuc' ,'area', ''],
+                   ['nuc' ,'FITC' , 'min_intensity'],
+                   ['nuc' ,'FITC' , 'max_intensity']], dtype=object)
+larr = LabeledArray(arr, labels)
+print larr.shape
+print larr['nuc', 'FITC'].shape
+print larr['nuc', 'FITC', 'max_intensity'].shape
+```
+
+The extra attributes including labels are automatically saved and loaded with the array. 
+```
+larr = LabeledArray(arr, labels)
+larr.time = np.arange(arr.shape[-1])
+larr.save('temp')
+new_larr = LabeledArray().load('temp')
+print new_larr.time
+```
diff --git a/celltk/labeledarray/__init__.py b/celltk/labeledarray/__init__.py
@@ -0,0 +1 @@
+from labeledarray.labeledarray import LabeledArray
diff --git a/celltk/labeledarray/labeledarray/__init__.py b/celltk/labeledarray/labeledarray/__init__.py
diff --git a/celltk/labeledarray/labeledarray/labeledarray.py b/celltk/labeledarray/labeledarray/labeledarray.py
@@ -0,0 +1,163 @@
+"""
+TODO: check if labels is unique.
+
+"""
+import numpy as np
+from collections import OrderedDict
+from utils import sort_labels_and_arr, uniform_list_length
+
+
+class LabeledArray(np.ndarray):
+    """
+    Each rows corresponds to labels, each columns corresponds to cells.
+    Underlying data structure can be N-dimensional array. First dimension will be used for labeled array.
+
+    Examples:
+        >> arr = np.arange(12).reshape((3, 2, 2))
+        >> labelarr = np.array([['a1' ,'b1', ''], 
+                                ['a1' ,'b2' , 'c1'], 
+                                ['a1' ,'b2' , 'c2']], dtype=object)
+        >> darr = DArray(arr, labelarr)
+        >> assert darr['a1'].shape
+        (3, 2, 2)
+        >> darr['a1', 'b1'].shape
+        (2, 2)
+        >> darr['a1', 'b2', 'c1']
+        DArray([[4, 5],
+               [6, 7]])
+    """
+
+    idx = None
+    labels = None
+
+    def __new__(cls, arr=None, labels=None, idx=None):
+        if arr is None:
+            return np.asarray(arr).view(cls)
+        labels, arr = sort_labels_and_arr(labels, arr)
+        if not isinstance(labels, np.ndarray) and labels is not None:
+            labels = np.array(uniform_list_length(labels), dtype=object)
+        obj = np.asarray(arr).view(cls)
+        obj.labels = labels
+        obj.idx = idx
+        return obj
+
+    def __array_finalize__(self, obj):
+        if obj is None: return
+        self.labels = getattr(obj, 'labels', None)
+        if self.labels is None: return
+        if hasattr(obj, 'idx') and self.ndim >= 1:
+            if obj.idx is None: return
+            if isinstance(obj.idx, int):
+                self.labels = self.labels[obj.idx]
+            else:
+                self.labels = self.labels[obj.idx[0]]
+            if isinstance(self.labels, str):
+                return
+            if self.labels.ndim > 1:
+                f_leftshift = lambda a1:all(x>=y for x, y in zip(a1, a1[1:]))
+                all_column = np.all(self.labels == self.labels[0,:], axis=0)
+                sl = 0 if not f_leftshift(all_column) else all_column.sum()
+                self.labels = self.labels[:, slice(sl, None)]
+            if self.labels.ndim == 1:
+                self.labels = None
+                obj = np.array(obj)
+
+    def __getitem__(self, item):
+        if isinstance(item, str):
+            item = self._label2idx(item)
+        if isinstance(item, tuple):
+            if isinstance(item[0], str):
+                item = self._label2idx(item)
+        self.idx = item
+        ret = super(LabeledArray, self).__getitem__(item)
+        return ret.squeeze()
+
+    def _label2idx(self, item):
+        item = (item, ) if not isinstance(item, tuple) else item
+        boolarr = np.ones(self.labels.shape[0], dtype=bool)
+        for num, it in enumerate(item):
+            boolarr = boolarr * (self.labels[:, num]==it)
+        tidx = np.where(boolarr)[0]
+        if boolarr.sum() == 1:
+            return tuple(tidx)
+        if boolarr.all():
+            return (slice(None, None, None), ) + (slice(None, None, None), ) * (self.ndim - 1)
+        minidx = min(tidx) if min(tidx) > 0 else None
+        maxidx = max(tidx) if max(tidx) < self.shape[0] - 1 else None
+        if boolarr.sum() > 1:
+            return (slice(minidx, maxidx, None), ) + (slice(None, None, None), ) * (self.ndim - 1)
+
+    def vstack(self, larr):
+        """merging first dimension (more labels)
+        """
+        if self.ndim > larr.ndim:
+            larr = np.expand_dims(larr, axis=0)
+        return LabeledArray(np.vstack((self, larr)), np.vstack((self.labels, larr.labels)))
+
+    def hstack(self, larr):
+        """merging second dimension (more cells)
+        """
+        if (self.labels == larr.labels).all():
+            return LabeledArray(np.hstack((self, larr)), self.labels)
+
+    def save(self, file_name):
+        extra_fields = set(dir(self)).difference(set(dir(self.__class__)))
+        data = dict(arr=self, labels=self.labels)
+        for ef in extra_fields:
+            data[ef] = getattr(self, ef)
+        np.savez_compressed(file_name, **data)
+
+    @classmethod
+    def load(cls, file_name):
+        if not file_name.endswith('.npz'):
+            file_name = file_name + '.npz'
+        f = np.load(file_name)
+        arr, labels = f['arr'], f['labels']
+        la = LabeledArray(arr, labels)
+        for key, value in f.iteritems():
+            if not ('arr' == key or 'labels' == key):
+                setattr(la, key, value)
+        return la
+
+
+if __name__ == "__main__":
+    # Check 2D.
+    arr = np.random.rand(3, 100)
+    labelarr = np.array([['a1', 'b1', ''], 
+                        ['a1' ,'b2' , 'c1'], 
+                        ['a1' ,'b2' , 'c2']], dtype=object)
+    darr = LabeledArray(arr, labelarr)
+    # stop
+    assert darr['a1'].shape == (3, 100)
+    assert darr['a1', 'b1'].shape == (100, )
+    assert darr['a1', 'b2'].shape == (2, 100)
+    assert darr['a1', 'b2', 'c1'].shape == (100, )
+
+    # check 3D.
+    arr = np.arange(12).reshape((3, 2, 2))
+    labelarr = np.array([['a1' ,'b1', ''], 
+                        ['a1' ,'b2' , 'c1'], 
+                        ['a1' ,'b2' , 'c2']], dtype=object)
+    darr = LabeledArray(arr, labelarr)
+    assert darr['a1'].shape == (3, 2, 2)
+    assert darr['a1', 'b1'].shape == (2, 2)
+    assert darr['a1', 'b2'].shape == (2, 2, 2)
+    assert darr['a1', 'b2', 'c1'].shape == (2, 2)
+    assert darr.shape == (3, 2, 2)
+    assert darr[1:, :, :].shape == (2, 2, 2)
+    assert darr[1, :, :].shape == (2, 2)
+    assert np.all(darr['a1', 'b2'].labels == np.array([['c1'], ['c2']]))
+
+    # can save and load extra fields. add "time" for example.
+    darr.time = np.arange(darr.shape[-1])
+    darr.save('test')
+    cc = LabeledArray().load('test.npz')
+    assert cc.time.shape == (2,)
+    cc[0:2, :, :]
+    cc['a1', 'b1'][0, 0] = 100
+    assert np.sum(cc == 100) == 1
+
+    assert darr.vstack(darr).shape == (2 * darr.shape[0], darr.shape[1], darr.shape[2])
+    assert darr.hstack(darr).shape == (darr.shape[0], 2 * darr.shape[1], darr.shape[2])
+
+
diff --git a/celltk/labeledarray/labeledarray/temp.npz b/celltk/labeledarray/labeledarray/temp.npz
diff --git a/celltk/labeledarray/labeledarray/test.npz b/celltk/labeledarray/labeledarray/test.npz
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from labeledarray.labeledarray import LabeledArray