forked from braysia/CellTK
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
66 changed files
with
1,327 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +0,0 @@ | ||
[submodule "celltk/labeledarray"] | ||
path = celltk/labeledarray | ||
url = https://github.com/braysia/labeledarray | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule labeledarray
deleted from
39d847
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2017 braysia | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# LabeledArray | ||
|
||
Numpy array subclass for indexing by strings. | ||
|
||
Using multi-index in pandas sometimes provides complications in terms of "copies vs views". This array is to provide numpy.array's behavior and still enable to slice array by strings. | ||
|
||
Underlying data can be 2D, 3D or N-dimensional array. First dimension will be used for labels (multi-index). | ||
|
||
``` | ||
arr = np.zeros((3, 20, 100)) | ||
labels = np.array([['nuc' ,'area', ''], | ||
['nuc' ,'FITC' , 'min_intensity'], | ||
['nuc' ,'FITC' , 'max_intensity']], dtype=object) | ||
larr = LabeledArray(arr, labels) | ||
print larr.shape | ||
print larr['nuc', 'FITC'].shape | ||
print larr['nuc', 'FITC', 'max_intensity'].shape | ||
``` | ||
|
||
The extra attributes including labels are automatically saved and loaded with the array. | ||
``` | ||
larr = LabeledArray(arr, labels) | ||
larr.time = np.arange(arr.shape[-1]) | ||
larr.save('temp') | ||
new_larr = LabeledArray().load('temp') | ||
print new_larr.time | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from labeledarray.labeledarray import LabeledArray |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
""" | ||
TODO: check if labels is unique. | ||
""" | ||
import numpy as np | ||
from collections import OrderedDict | ||
from utils import sort_labels_and_arr, uniform_list_length | ||
|
||
|
||
class LabeledArray(np.ndarray): | ||
""" | ||
Each rows corresponds to labels, each columns corresponds to cells. | ||
Underlying data structure can be N-dimensional array. First dimension will be used for labeled array. | ||
Examples: | ||
>> arr = np.arange(12).reshape((3, 2, 2)) | ||
>> labelarr = np.array([['a1' ,'b1', ''], | ||
['a1' ,'b2' , 'c1'], | ||
['a1' ,'b2' , 'c2']], dtype=object) | ||
>> darr = DArray(arr, labelarr) | ||
>> assert darr['a1'].shape | ||
(3, 2, 2) | ||
>> darr['a1', 'b1'].shape | ||
(2, 2) | ||
>> darr['a1', 'b2', 'c1'] | ||
DArray([[4, 5], | ||
[6, 7]]) | ||
""" | ||
|
||
idx = None | ||
labels = None | ||
|
||
def __new__(cls, arr=None, labels=None, idx=None): | ||
if arr is None: | ||
return np.asarray(arr).view(cls) | ||
labels, arr = sort_labels_and_arr(labels, arr) | ||
if not isinstance(labels, np.ndarray) and labels is not None: | ||
labels = np.array(uniform_list_length(labels), dtype=object) | ||
obj = np.asarray(arr).view(cls) | ||
obj.labels = labels | ||
obj.idx = idx | ||
return obj | ||
|
||
def __array_finalize__(self, obj): | ||
if obj is None: return | ||
self.labels = getattr(obj, 'labels', None) | ||
if self.labels is None: return | ||
if hasattr(obj, 'idx') and self.ndim >= 1: | ||
if obj.idx is None: return | ||
if isinstance(obj.idx, int): | ||
self.labels = self.labels[obj.idx] | ||
else: | ||
self.labels = self.labels[obj.idx[0]] | ||
if isinstance(self.labels, str): | ||
return | ||
if self.labels.ndim > 1: | ||
f_leftshift = lambda a1:all(x>=y for x, y in zip(a1, a1[1:])) | ||
all_column = np.all(self.labels == self.labels[0,:], axis=0) | ||
sl = 0 if not f_leftshift(all_column) else all_column.sum() | ||
self.labels = self.labels[:, slice(sl, None)] | ||
if self.labels.ndim == 1: | ||
self.labels = None | ||
obj = np.array(obj) | ||
|
||
def __getitem__(self, item): | ||
if isinstance(item, str): | ||
item = self._label2idx(item) | ||
if isinstance(item, tuple): | ||
if isinstance(item[0], str): | ||
item = self._label2idx(item) | ||
self.idx = item | ||
ret = super(LabeledArray, self).__getitem__(item) | ||
return ret.squeeze() | ||
|
||
def _label2idx(self, item): | ||
item = (item, ) if not isinstance(item, tuple) else item | ||
boolarr = np.ones(self.labels.shape[0], dtype=bool) | ||
for num, it in enumerate(item): | ||
boolarr = boolarr * (self.labels[:, num]==it) | ||
tidx = np.where(boolarr)[0] | ||
if boolarr.sum() == 1: | ||
return tuple(tidx) | ||
if boolarr.all(): | ||
return (slice(None, None, None), ) + (slice(None, None, None), ) * (self.ndim - 1) | ||
minidx = min(tidx) if min(tidx) > 0 else None | ||
maxidx = max(tidx) if max(tidx) < self.shape[0] - 1 else None | ||
if boolarr.sum() > 1: | ||
return (slice(minidx, maxidx, None), ) + (slice(None, None, None), ) * (self.ndim - 1) | ||
|
||
def vstack(self, larr): | ||
"""merging first dimension (more labels) | ||
""" | ||
if self.ndim > larr.ndim: | ||
larr = np.expand_dims(larr, axis=0) | ||
return LabeledArray(np.vstack((self, larr)), np.vstack((self.labels, larr.labels))) | ||
|
||
def hstack(self, larr): | ||
"""merging second dimension (more cells) | ||
""" | ||
if (self.labels == larr.labels).all(): | ||
return LabeledArray(np.hstack((self, larr)), self.labels) | ||
|
||
def save(self, file_name): | ||
extra_fields = set(dir(self)).difference(set(dir(self.__class__))) | ||
data = dict(arr=self, labels=self.labels) | ||
for ef in extra_fields: | ||
data[ef] = getattr(self, ef) | ||
np.savez_compressed(file_name, **data) | ||
|
||
@classmethod | ||
def load(cls, file_name): | ||
if not file_name.endswith('.npz'): | ||
file_name = file_name + '.npz' | ||
f = np.load(file_name) | ||
arr, labels = f['arr'], f['labels'] | ||
la = LabeledArray(arr, labels) | ||
for key, value in f.iteritems(): | ||
if not ('arr' == key or 'labels' == key): | ||
setattr(la, key, value) | ||
return la | ||
|
||
|
||
if __name__ == "__main__": | ||
# Check 2D. | ||
arr = np.random.rand(3, 100) | ||
labelarr = np.array([['a1', 'b1', ''], | ||
['a1' ,'b2' , 'c1'], | ||
['a1' ,'b2' , 'c2']], dtype=object) | ||
darr = LabeledArray(arr, labelarr) | ||
# stop | ||
assert darr['a1'].shape == (3, 100) | ||
assert darr['a1', 'b1'].shape == (100, ) | ||
assert darr['a1', 'b2'].shape == (2, 100) | ||
assert darr['a1', 'b2', 'c1'].shape == (100, ) | ||
|
||
# check 3D. | ||
arr = np.arange(12).reshape((3, 2, 2)) | ||
labelarr = np.array([['a1' ,'b1', ''], | ||
['a1' ,'b2' , 'c1'], | ||
['a1' ,'b2' , 'c2']], dtype=object) | ||
darr = LabeledArray(arr, labelarr) | ||
assert darr['a1'].shape == (3, 2, 2) | ||
assert darr['a1', 'b1'].shape == (2, 2) | ||
assert darr['a1', 'b2'].shape == (2, 2, 2) | ||
assert darr['a1', 'b2', 'c1'].shape == (2, 2) | ||
assert darr.shape == (3, 2, 2) | ||
assert darr[1:, :, :].shape == (2, 2, 2) | ||
assert darr[1, :, :].shape == (2, 2) | ||
assert np.all(darr['a1', 'b2'].labels == np.array([['c1'], ['c2']])) | ||
|
||
# can save and load extra fields. add "time" for example. | ||
darr.time = np.arange(darr.shape[-1]) | ||
darr.save('test') | ||
cc = LabeledArray().load('test.npz') | ||
assert cc.time.shape == (2,) | ||
cc[0:2, :, :] | ||
cc['a1', 'b1'][0, 0] = 100 | ||
assert np.sum(cc == 100) == 1 | ||
|
||
assert darr.vstack(darr).shape == (2 * darr.shape[0], darr.shape[1], darr.shape[2]) | ||
assert darr.hstack(darr).shape == (darr.shape[0], 2 * darr.shape[1], darr.shape[2]) | ||
|
||
|
Binary file not shown.
Binary file not shown.
Oops, something went wrong.