Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ecDNA simulator #240

Merged
merged 42 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
94eb703
inited ecdna birth death simulator.py
Jul 18, 2022
d08ecb7
updated BirthDeathFitnessSimulator for more flexibility
jpdsilva Jul 27, 2022
cc4a99e
ecDNABirthDeathSimulator is broken / in process of
jpdsilva Aug 12, 2022
e1af41e
i don't think ecDNABirthDeathSimulator compiles, trying to add random…
jpdsilva Aug 12, 2022
38989ee
added ecdna simulator first pass, some tests started
Aug 12, 2022
3c2cc6b
added ecdna simulator first pass, some tests started
Aug 12, 2022
6feeeb5
debugging ecDNABirthDeathSimulator.py
jpdsilva Aug 22, 2022
8cec035
continuing to debug / clean up ecDNABirthDeathSimulator.py, working o…
jpdsilva Aug 22, 2022
cfbf01e
Delete ecDNABirthDeathSimulator_v2.py
jpdsilva Aug 22, 2022
d4f79ba
added unit tests for ecDNABirthDeathSimulator.py
jpdsilva Aug 24, 2022
372e37f
Merge branch 'ecdna-simulator' of https://github.com/YosefLab/Cassiop…
jpdsilva Aug 24, 2022
6bf1d3f
added CN-depdt fitness with epistasis
jpdsilva Aug 26, 2022
e48163b
small notes indicating where we want to make changes
Aug 26, 2022
87638d1
implemented populate tree from simulation
Aug 31, 2022
1689f4a
wrote more tests
Aug 31, 2022
3f03685
adding metadata to tree and cleaning up ecDNA_sim_test
jpdsilva Sep 1, 2022
d297d09
edits to notebook
jpdsilva Sep 1, 2022
45dc120
Merge branch 'ecdna-simulator' of https://github.com/YosefLab/Cassiop…
jpdsilva Sep 1, 2022
f6d10be
added functionality for simulating cooperativity
Sep 5, 2022
bcd2106
updated simulation for cooperativity
Sep 5, 2022
b7ae314
bug fix in cooperativity sim
Sep 7, 2022
e9b5e5e
added tests for cosegregation
Sep 7, 2022
1c7e532
added binomial noise model
Sep 30, 2022
a2c1193
sped up simulator
Dec 1, 2022
5ceb883
updates for ecDNA drug perturbation simulation
Apr 12, 2023
73f694e
cleanup
mattjones315 Apr 12, 2024
ad17a6b
Merge branch 'master' of github.com:YosefLab/Cassiopeia into ecdna-si…
mattjones315 Apr 12, 2024
f32bfe0
updated format
mattjones315 Apr 12, 2024
a476f12
fixed tests
mattjones315 Apr 13, 2024
7d63c8e
reverted back to PriorityQueue
mattjones315 Apr 13, 2024
8bbcd10
moved ecDNA simulator back to PriorityQueue
mattjones315 Apr 13, 2024
30b4552
rebuilding docs
mattjones315 Apr 14, 2024
189538e
added ecDNA simulator to user guide
mattjones315 Apr 14, 2024
bffa6c0
added ecDNA simulator to user guide
mattjones315 Apr 14, 2024
4ef1270
added ecDNA notebook
mattjones315 Apr 14, 2024
b6cb41c
added ecDNA notebook
mattjones315 Apr 14, 2024
10621c0
updated notebooks
mattjones315 Apr 15, 2024
77fc89b
updated notebooks
mattjones315 Apr 15, 2024
283cdc7
updated formatting issues and docs in birthdeathfitnessimulator
mattjones315 May 8, 2024
a351abd
covered edge case where no birth scale is in initial tree
mattjones315 May 8, 2024
83009eb
updated docs
mattjones315 May 8, 2024
fca94b2
removed old unused code
mattjones315 May 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cassiopeia/mixins/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class DistanceSolverError(Exception):

pass

class ecDNABirthDeathSimulatorError(Exception):
"""An ExceptionClass for ecDNABirthDeathSimulator class."""

pass

class FitchCountError(Exception):
"""An ExceptionClass for FitchCount."""
Expand Down
1 change: 1 addition & 0 deletions cassiopeia/plotting/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def create_indel_heatmap(
indel_colors: Optional[pd.DataFrame] = None,
indel_priors: Optional[pd.DataFrame] = None,
random_state: Optional[np.random.RandomState] = None,
clustered_linprof: Optional[pd.DataFrame] = None,
mattjones315 marked this conversation as resolved.
Show resolved Hide resolved
) -> Tuple[
List[
Dict[
Expand Down
184 changes: 143 additions & 41 deletions cassiopeia/simulator/BirthDeathFitnessSimulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class BirthDeathFitnessSimulator(TreeSimulator):
collapse_unifurcations: Specifies whether to collapse unifurcations in
the tree resulting from pruning dead lineages
random_seed: A seed for reproducibility
initial_tree: A tree used for initializing the simulation.
mattjones315 marked this conversation as resolved.
Show resolved Hide resolved

Raises:
TreeSimulatorError if invalid stopping conditions are provided or if a
Expand All @@ -114,6 +115,7 @@ def __init__(
experiment_time: Optional[float] = None,
collapse_unifurcations: bool = True,
random_seed: int = None,
initial_tree: Optional[CassiopeiaTree] = None,
):
if num_extant is None and experiment_time is None:
raise TreeSimulatorError(
Expand Down Expand Up @@ -147,6 +149,85 @@ def __init__(
self.collapse_unifurcations = collapse_unifurcations
self.random_seed = random_seed

# useful for resuming a simulation, perhaps under different pressures.
self.initial_tree = initial_tree

def initialize_tree(self, names) -> nx.DiGraph:
"""initializes a tree (nx.DiGraph() object with one node). Auxiliary data for each node is grabbed from self (initial conditions / params) or hardcoded
mattjones315 marked this conversation as resolved.
Show resolved Hide resolved

Args: names is a generator (function object that stores internal state) that will be used to generate names for the tree nodes
Output: tree (DiGraph object with one node, the root)
root (name of root node in tree)
"""
if self.initial_tree:
tree = self.initial_tree.get_tree_topology()
for node in self.initial_tree.nodes:
tree.nodes[node]['birth_scale'] = self.initial_tree.get_attribute(node, 'birth_scale')
mattjones315 marked this conversation as resolved.
Show resolved Hide resolved
tree.nodes[node]['time'] = self.initial_tree.get_attribute(node, 'time')
return tree

tree = nx.DiGraph()
root = next(names)
tree.add_node(root)
tree.nodes[root]["birth_scale"] = self.initial_birth_scale
tree.nodes[root]["time"] = 0

return tree

def make_initial_lineage_dict(self, tree: nx.DiGraph):
"""
uses self initial-conditions and hardcoded default parameters to create an initial lineage dict
Args: id_value: name of new lineage

Output: a lineage dict
"""

leaves = [node for node in tree if tree.out_degree(node) == 0]
current_lineages = PriorityQueue()
for leaf in leaves:

lineage_dict = self.make_lineage_dict(
leaf, tree.nodes[leaf]['birth_scale'], tree.nodes[leaf]['time'], True
)

if len(tree.nodes) == 1:
return lineage_dict

current_lineages.put(
(
tree.nodes[leaf]['time'],
leaf,
lineage_dict
)
)

return current_lineages

def make_lineage_dict(
self,
id_value,
birth_scale,
total_time,
active_flag,
):
"""makes a dict (lineage) from the given parameters. keys are hardcoded.

Args: id_value: id of new lineage
birth_scale: birth_scale parameter of new lineage
total_time: age of lineage
active_flag: bool to indicate whether lineage is active

Returns: a dict (lineage) with the parameter values under the hard-coded keys

"""
lineage_dict = {
"id": id_value,
"birth_scale": birth_scale,
"total_time": total_time,
"active": active_flag,
}
return lineage_dict

def simulate_tree(
self,
) -> CassiopeiaTree:
Expand All @@ -169,39 +250,39 @@ def simulate_tree(
TreeSimulatorError if all lineages die before a stopping condition
"""

def node_name_generator() -> Generator[str, None, None]:
def node_name_generator(start = 0) -> Generator[str, None, None]:
"""Generates unique node names for the tree."""
i = 0
i = start
while True:
yield str(i)
i += 1

names = node_name_generator()
starting_index = 0
if self.initial_tree:
starting_index = (np.max([int(l) for l in self.initial_tree.leaves]) + 1)
names = node_name_generator(starting_index)

# Set the seed
if self.random_seed:
np.random.seed(self.random_seed)

tree = self.initialize_tree(names)

# Instantiate the implicit root
tree = nx.DiGraph()
root = next(names)
tree.add_node(root)
tree.nodes[root]["birth_scale"] = self.initial_birth_scale
tree.nodes[root]["time"] = 0
current_lineages = PriorityQueue()
current_lineages = PriorityQueue() # instantiate queue
# Records the nodes that are observed at the end of the experiment

# TO DO: update to accept arbitrary fields in the dict.
observed_nodes = []
starting_lineage = {
"id": root,
"birth_scale": self.initial_birth_scale,
"total_time": 0,
"active": True,
}

# Sample the waiting time until the first division
self.sample_lineage_event(
starting_lineage, current_lineages, tree, names, observed_nodes
)
starting_lineage = self.make_initial_lineage_dict(tree)

if len(tree.nodes) == 1:
# Sample the waiting time until the first division
self.sample_lineage_event(
starting_lineage, current_lineages, tree, names, observed_nodes
)
else:
current_lineages = starting_lineage

# Perform the process until there are no active extant lineages left
while not current_lineages.empty():
Expand All @@ -219,6 +300,7 @@ def node_name_generator() -> Generator[str, None, None]:
min_total_time = remaining_lineages[0]["total_time"]
for lineage in remaining_lineages:
parent = list(tree.predecessors(lineage["id"]))[0]

tree.nodes[lineage["id"]]["time"] += (
min_total_time - lineage["total_time"]
)
Expand All @@ -229,31 +311,18 @@ def node_name_generator() -> Generator[str, None, None]:
break
# Pop the minimum age lineage to simulate forward time
_, _, lineage = current_lineages.get()

# If the lineage is no longer active, just remove it from the queue.
# This represents the time at which the lineage dies.
if lineage["active"]:
for _ in range(2):
for i in range(2):
self.sample_lineage_event(
lineage, current_lineages, tree, names, observed_nodes
)

cassiopeia_tree = CassiopeiaTree(tree=tree)
time_dictionary = {}
for i in tree.nodes:
time_dictionary[i] = tree.nodes[i]["time"]
cassiopeia_tree.set_times(time_dictionary)

# Prune dead lineages and collapse resulting unifurcations
to_remove = list(set(cassiopeia_tree.leaves) - set(observed_nodes))
cassiopeia_tree.remove_leaves_and_prune_lineages(to_remove)
if self.collapse_unifurcations and len(cassiopeia_tree.nodes) > 1:
cassiopeia_tree.collapse_unifurcations(source="1")

# If only implicit root remains after pruning dead lineages, error
if len(cassiopeia_tree.nodes) == 1:
raise TreeSimulatorError(
"All lineages died before stopping condition"
)
cassiopeia_tree = self.populate_tree_from_simulation(
tree, observed_nodes
)

return cassiopeia_tree

Expand All @@ -266,7 +335,6 @@ def sample_lineage_event(
observed_nodes: List[str],
) -> None:
"""A helper function that samples an event for a lineage.

Takes a lineage and determines the next event in that lineage's
future. Simulates the lifespan of a new descendant. Birth and
death waiting times are sampled, representing how long the
Expand All @@ -285,7 +353,6 @@ def sample_lineage_event(
the lifespan is cut off at the experiment time and a final observed
sample is added to the tree. In this case the lineage is marked as
inactive as well.

Args:
unique_id: The unique ID number to be used to name a new node
added to the tree
Expand All @@ -300,7 +367,6 @@ def sample_lineage_event(
names: A generator providing unique names for tree nodes
observed_nodes: A list of nodes that are observed at the end of
the experiment

Raises:
TreeSimulatorError if a negative waiting time is sampled or a
non-active lineage is passed in
Expand Down Expand Up @@ -378,6 +444,7 @@ def sample_lineage_event(
)
)


else:
tree.add_node(unique_id)
tree.nodes[unique_id]["birth_scale"] = lineage["birth_scale"]
Expand Down Expand Up @@ -435,3 +502,38 @@ def update_fitness(self, birth_scale: float) -> float:
self.fitness_base ** self.fitness_distribution()
)
return birth_scale * base_selection_coefficient

def populate_tree_from_simulation(
self, tree: nx.DiGraph, observed_nodes: List[str]
) -> CassiopeiaTree:
"""Populates tree with appropriate meta data.

Args:
tree: The tree simulated with ecDNA and fitness values populated as attributes.
observed_nodes: The observed leaves of the tree.

Returns:
A CassiopeiaTree with relevant node attributes filled in.
"""

cassiopeia_tree = CassiopeiaTree(tree=tree)

time_dictionary = {}
for i in tree.nodes:
time_dictionary[i] = tree.nodes[i]["time"]
cassiopeia_tree.set_attribute(i, 'birth_scale', tree.nodes[i]['birth_scale'])
cassiopeia_tree.set_times(time_dictionary)

# Prune dead lineages and collapse resulting unifurcations
to_remove = list(set(cassiopeia_tree.leaves) - set(observed_nodes))
cassiopeia_tree.remove_leaves_and_prune_lineages(to_remove)
if self.collapse_unifurcations and len(cassiopeia_tree.nodes) > 1:
cassiopeia_tree.collapse_unifurcations(source="1")

# If only implicit root remains after pruning dead lineages, error
if len(cassiopeia_tree.nodes) == 1:
raise TreeSimulatorError(
"All lineages died before stopping condition"
)

return cassiopeia_tree
1 change: 1 addition & 0 deletions cassiopeia/simulator/UniformLeafSubsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,5 @@ def subsample_leaves(
[(node, tree.get_time(node)) for node in subsampled_tree.nodes]
)
)

return subsampled_tree
2 changes: 2 additions & 0 deletions cassiopeia/simulator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .ClonalSpatialDataSimulator import ClonalSpatialDataSimulator
from .CompleteBinarySimulator import CompleteBinarySimulator
from .DataSimulator import DataSimulator
from .ecDNABirthDeathSimulator import ecDNABirthDeathSimulator
from .LeafSubsampler import LeafSubsampler
from .LineageTracingDataSimulator import LineageTracingDataSimulator
from .SimpleFitSubcloneSimulator import SimpleFitSubcloneSimulator
Expand All @@ -25,6 +26,7 @@
"SeqeuntialLineageTracingDataSimulator",
"CompleteBinarySimulator",
"DataSimulator",
"ecDNABirthDeathSimulator",
"LeafSubsampler",
"LineageTracingDataSimulator",
"SimpleFitSubcloneSimulator",
Expand Down
Loading
Loading