From faf7467ce9000f2435a7ee8efab05a5c51beb184 Mon Sep 17 00:00:00 2001
From: Matt Jones <mattjones315@gmail.com>
Date: Thu, 7 Mar 2024 13:44:25 -0800
Subject: [PATCH 1/3] fixed bug in lca-reconstruct of ambiguous states

---
 cassiopeia/data/utilities.py           |  5 +++++
 test/data_tests/data_utilities_test.py | 11 +++++++++++
 2 files changed, 16 insertions(+)

diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py
index bcf008f4..0a265e7b 100755
--- a/cassiopeia/data/utilities.py
+++ b/cassiopeia/data/utilities.py
@@ -72,6 +72,7 @@ def get_lca_characters(
                 else:
                     lca_vec[i] = all_states[0]
             else:
+                all_ambiguous = np.all([is_ambiguous_state(s) for s in all_states])
                 chars = set.intersection(
                     *map(
                         set,
@@ -83,6 +84,10 @@ def get_lca_characters(
                 )
                 if len(chars) == 1:
                     lca_vec[i] = list(chars)[0]
+                if all_ambiguous:
+                    # if we only have ambiguous states, we set the LCA state
+                    # to be the intersection.
+                    lca_vec[i] = tuple(chars)
     return lca_vec
 
 
diff --git a/test/data_tests/data_utilities_test.py b/test/data_tests/data_utilities_test.py
index 94902faa..67eb2fd7 100755
--- a/test/data_tests/data_utilities_test.py
+++ b/test/data_tests/data_utilities_test.py
@@ -316,6 +316,17 @@ def test_lca_characters_ambiguous(self):
         )
         self.assertEqual(ret_vec, [1, 2, 3, 0, 5])
 
+    def test_lca_characters_ambiguous2(self):
+        
+        s1 = [(4, 62), (3, 10), (3, 10, 16), (0, 3), (0, 2, 3), (0, 2, 3), (0, 4, 7), (0, 2, 23), (0, 1, 4, 44)]
+        s2 = [4, 3, -1, 0, 0, 0, (0, 7), (0, 2), (0, 4)]
+
+        expected_reconstruction = [4, 3, (3, 10, 16), 0, 0, 0, (0, 7), (0, 2), (0, 4)]
+        ret_vec = data_utilities.get_lca_characters(
+            [s1, s2], missing_state_indicator=-1
+        )
+        self.assertEqual(ret_vec, expected_reconstruction)
+
     def test_lca_characters_ambiguous_and_missing(self):
         vecs = [
             [(1, 1), (0, 2), (3, 0), (4,), (5,)],

From b5b84cfc33985afcb1adcc1038f9393d6d1e3a49 Mon Sep 17 00:00:00 2001
From: Matt Jones <mattjones315@gmail.com>
Date: Thu, 7 Mar 2024 13:45:47 -0800
Subject: [PATCH 2/3] reformatted

---
 cassiopeia/data/utilities.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py
index 0a265e7b..c2d4d970 100755
--- a/cassiopeia/data/utilities.py
+++ b/cassiopeia/data/utilities.py
@@ -1,6 +1,7 @@
 """
 General utilities for the datasets encountered in Cassiopeia.
 """
+
 import collections
 from joblib import delayed
 import multiprocessing
@@ -61,7 +62,7 @@ def get_lca_characters(
             all_states = [
                 vec[i] for vec in vecs if vec[i] != missing_state_indicator
             ]
-            
+
             # this check is specifically if all_states consists of a single
             # ambiguous state.
             if len(list(set(all_states))) == 1:
@@ -72,7 +73,9 @@ def get_lca_characters(
                 else:
                     lca_vec[i] = all_states[0]
             else:
-                all_ambiguous = np.all([is_ambiguous_state(s) for s in all_states])
+                all_ambiguous = np.all(
+                    [is_ambiguous_state(s) for s in all_states]
+                )
                 chars = set.intersection(
                     *map(
                         set,
@@ -233,9 +236,7 @@ def compute_dissimilarity_map(
         ]
 
         # load character matrix into shared memory
-        shm = shared_memory.SharedMemory(
-            create=True, size=cm.nbytes
-        )
+        shm = shared_memory.SharedMemory(create=True, size=cm.nbytes)
         shared_cm = np.ndarray(cm.shape, dtype=cm.dtype, buffer=shm.buf)
         shared_cm[:] = cm[:]
 

From dced9533d0a796bd9d4c65a8c44ecc726ec6dd65 Mon Sep 17 00:00:00 2001
From: Matt Jones <mattjones315@gmail.com>
Date: Thu, 7 Mar 2024 13:45:56 -0800
Subject: [PATCH 3/3] reformatted

---
 test/data_tests/data_utilities_test.py | 48 +++++++++++++++++++-------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/test/data_tests/data_utilities_test.py b/test/data_tests/data_utilities_test.py
index 67eb2fd7..2d40d867 100755
--- a/test/data_tests/data_utilities_test.py
+++ b/test/data_tests/data_utilities_test.py
@@ -85,7 +85,7 @@ def test_bootstrap_character_matrices_no_priors(self):
 
         self.assertEqual(len(bootstrap_samples), 10)
 
-        for (bootstrap_matrix, bootstrap_priors) in bootstrap_samples:
+        for bootstrap_matrix, bootstrap_priors in bootstrap_samples:
             self.assertCountEqual(
                 self.character_matrix.index, bootstrap_matrix.index
             )
@@ -113,7 +113,7 @@ def test_bootstrap_character_matrices_with_priors(self):
 
         self.assertEqual(len(bootstrap_samples), 10)
 
-        for (bootstrap_matrix, bootstrap_priors) in bootstrap_samples:
+        for bootstrap_matrix, bootstrap_priors in bootstrap_samples:
             self.assertCountEqual(
                 self.character_matrix.index, bootstrap_matrix.index
             )
@@ -317,11 +317,31 @@ def test_lca_characters_ambiguous(self):
         self.assertEqual(ret_vec, [1, 2, 3, 0, 5])
 
     def test_lca_characters_ambiguous2(self):
-        
-        s1 = [(4, 62), (3, 10), (3, 10, 16), (0, 3), (0, 2, 3), (0, 2, 3), (0, 4, 7), (0, 2, 23), (0, 1, 4, 44)]
+
+        s1 = [
+            (4, 62),
+            (3, 10),
+            (3, 10, 16),
+            (0, 3),
+            (0, 2, 3),
+            (0, 2, 3),
+            (0, 4, 7),
+            (0, 2, 23),
+            (0, 1, 4, 44),
+        ]
         s2 = [4, 3, -1, 0, 0, 0, (0, 7), (0, 2), (0, 4)]
 
-        expected_reconstruction = [4, 3, (3, 10, 16), 0, 0, 0, (0, 7), (0, 2), (0, 4)]
+        expected_reconstruction = [
+            4,
+            3,
+            (3, 10, 16),
+            0,
+            0,
+            0,
+            (0, 7),
+            (0, 2),
+            (0, 4),
+        ]
         ret_vec = data_utilities.get_lca_characters(
             [s1, s2], missing_state_indicator=-1
         )
@@ -336,7 +356,7 @@ def test_lca_characters_ambiguous_and_missing(self):
         ret_vec = data_utilities.get_lca_characters(
             vecs, missing_state_indicator=-1
         )
-        self.assertEqual(ret_vec, [1, (0,2), (3,0), 0, 5])
+        self.assertEqual(ret_vec, [1, (0, 2), (3, 0), 0, 5])
 
     def test_resolve_most_abundant(self):
         state = (1, 2, 3, 3)
@@ -463,8 +483,10 @@ def test_inter_cluster_distance_basic(self):
 
         tree = CassiopeiaTree(tree=tree, cell_meta=meta_data)
 
-        inter_cluster_distances = data_utilities.compute_inter_cluster_distances(
-            tree, meta_item="CellType"
+        inter_cluster_distances = (
+            data_utilities.compute_inter_cluster_distances(
+                tree, meta_item="CellType"
+            )
         )
 
         expected_distances = pd.DataFrame.from_dict(
@@ -518,10 +540,12 @@ def test_inter_cluster_distance_custom_input(self):
 
         tree = CassiopeiaTree(tree=tree)
 
-        inter_cluster_distances = data_utilities.compute_inter_cluster_distances(
-            tree,
-            meta_data=meta_data["CellType"],
-            dissimilarity_map=weight_matrix,
+        inter_cluster_distances = (
+            data_utilities.compute_inter_cluster_distances(
+                tree,
+                meta_data=meta_data["CellType"],
+                dissimilarity_map=weight_matrix,
+            )
         )
 
         expected_distances = pd.DataFrame.from_dict(