Skip to content

Commit

Permalink
Make a minimal functional API
Browse files Browse the repository at this point in the history
  • Loading branch information
currocam committed Mar 20, 2024
1 parent 62a13b2 commit 2f5ab03
Show file tree
Hide file tree
Showing 12 changed files with 62 additions and 54 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ path = "src/lib.rs"
name = "speedytree"
path = "src/bin.rs"
[dependencies]
anyhow = "1.0.81"
bit-set = "0.5.3"
bit-vec = "0.6.3"
bitvec = "1.0.1"
Expand Down
7 changes: 4 additions & 3 deletions src/bin.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
extern crate speedytree;
use clap::Parser;
/// # speedytree
/// `speedytree` is a command line tool for quickly creating a directory tree.
Expand All @@ -9,7 +10,7 @@ use hybrid_nj::neighbor_joining;
use speedytree::hybrid_nj;

use speedytree::distances::DistanceMatrix;
use speedytree::naive_nj::naive_neighbor_joining;
use speedytree::naive_nj::canonical_neighbor_joining;
use speedytree::newick::to_newick;
use speedytree::rapid_nj::rapid_nj;
use std::{
Expand Down Expand Up @@ -117,13 +118,13 @@ pub fn run(config: Config) {
.unwrap();

let reader = io::stdin().lock();
let d = DistanceMatrix::build_from_phylip(reader).unwrap_or_else(|err| {
let d = DistanceMatrix::read_from_phylip(reader).unwrap_or_else(|err| {
eprintln!("{err}");
process::exit(1);
});

let d = match config.algo {
Algorithm::Naive => naive_neighbor_joining(d),
Algorithm::Naive => canonical_neighbor_joining(d),
Algorithm::RapidNJ => rapid_nj(d, config.chunk_size),
Algorithm::Hybrid => {
let naive_steps = d.size() * config.naive_percentage / 100;
Expand Down
8 changes: 4 additions & 4 deletions src/distances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub struct DistanceMatrix {

/// Distance matrix from a phylip file
impl DistanceMatrix {
pub fn build_from_phylip<R>(mut reader: R) -> ResultBox<DistanceMatrix>
pub fn read_from_phylip<R>(mut reader: R) -> ResultBox<DistanceMatrix>
where
R: io::BufRead,
{
Expand Down Expand Up @@ -41,7 +41,7 @@ impl DistanceMatrix {
self.matrix.len()
}
/// Permutate the distance matrix for testing purposes
pub fn permutate(&mut self) {
pub(crate) fn permutate(&mut self) {

Check warning on line 44 in src/distances.rs

View workflow job for this annotation

GitHub Actions / Rust project

associated items `permutate` and `wikipedia_example` are never used
let mut rng = rand::thread_rng();
let mut perm = (0..self.size()).collect::<Vec<usize>>();
perm.shuffle(&mut rng);
Expand All @@ -59,7 +59,7 @@ impl DistanceMatrix {
self.names = new_names;
}
/// Example from Wikipedia, https://en.wikipedia.org/wiki/Neighbor_joining
pub fn wikipedia_example() -> DistanceMatrix {
pub(crate) fn wikipedia_example() -> DistanceMatrix {
DistanceMatrix {
matrix: vec![
vec![0.0, 5.0, 9.0, 9.0, 8.0],
Expand Down Expand Up @@ -95,7 +95,7 @@ D 9.0 10.0 8.0 0.0
"
.as_bytes();
// run function
let distance_matrix = DistanceMatrix::build_from_phylip::<&[u8]>(input).unwrap();
let distance_matrix = DistanceMatrix::read_from_phylip::<&[u8]>(input).unwrap();
// check result
assert_eq!(
distance_matrix.matrix,
Expand Down
5 changes: 2 additions & 3 deletions src/hybrid_nj/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::{
distances::DistanceMatrix, naive_nj::DataNaiveNJ, rapid_nj::DataRapidNJ, ResultBox, Tree,
};

/// Main function of the crate
/// This approach is a hybrid between the naive neighbor joining and the rapid neighbor joining.
/// If `naive_iters` is greater than n, then this function calls `naive_neighbor_joining` instead.
/// If `naive_iters` is less than 4, then this function calls `rapid_nj` instead.
Expand All @@ -19,10 +18,10 @@ pub fn neighbor_joining(
chunk_size: usize,
) -> ResultBox<Tree> {
if dist.size() < 4 || naive_iters >= dist.size() {
return crate::naive_neighbor_joining(dist);
return crate::naive_nj::canonical_neighbor_joining(dist);
}
if naive_iters < 4 {
return crate::rapid_nj(dist, chunk_size);
return crate::rapid_nj::rapid_nj(dist, chunk_size);
}
let mut q = crate::rapid_nj::QMatrix::from(&dist);
let mut t = crate::rapid_nj::PhyloTree::build(&dist.names);
Expand Down
56 changes: 28 additions & 28 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
//! speedytree: Command line tool for Neighbor Joining of biological sequences
//!
//! It implements different heuristics for fast Neighbor-Joining.
//!
//! 1. Naive Neighbor-Joining
//! 2. RapidNJ
//! 3. Hybrid
//! Canonical and RapidNJ implementations of Neighbor-joining in Rust
//!
//! Provides Rust implementation of the Canonical algorithm and something in the spirit of RapidNJ but with B-trees. Work in progress.
//! A minimal example is provided here.
//! ```
//! use speedytree::distances::DistanceMatrix;
//! use speedytree::canonical_neighbor_joining;
//! use speedytree::rapid_nj_neighbor_joining;
//! use speedytree::robinson_foulds;
//!// Raw Phylip format
//!let input = "5
//! a 0 5 9 9 8
//! b 5 0 10 10 9
//! c 9 10 0 8 7
//! d 9 10 8 0 3
//! e 8 9 7 3 0
//!".as_bytes();;
//!let distances = DistanceMatrix::read_from_phylip(input).unwrap();
//! // Use canonical algorithm
//!let tree1 = canonical_neighbor_joining(distances.clone()).unwrap();
//! // Use RapidNJ with b-trees-
//!let tree2 = rapid_nj_neighbor_joining(distances.clone(), 2).unwrap();
//! assert_eq!(robinson_foulds(tree1, tree2, 5), 0);
//! ```
#![warn(missing_docs)]
/// Hybrid neighbor joining algorithm
/// This approach is a hybrid between the naive neighbor joining and the rapid neighbor joining.
/// The idea is to use the rapidnj heuristic first to potentially stop the algorithm early,
/// and then use the naive neighbor joining to finish the algorithm, which is faster
/// in practice, but performs more comparisons in theory.
/// However, both algorithms are O(n^3), so the difference is not that big.
pub mod hybrid_nj;
/// Property tests for neighbor joining algorithm
pub mod property_tests;

/// Configuration of the program
pub mod configuration;
pub mod distances;
pub mod naive_nj;
pub mod newick;
pub mod rapid_nj;
use hybrid_nj::neighbor_joining;

use crate::distances::DistanceMatrix;
use crate::naive_nj::naive_neighbor_joining;
use crate::newick::to_newick;
use crate::rapid_nj::rapid_nj;
use std::{
error,
io::{self, Write},
process,
};
use std::error;

type ResultBox<T> = std::result::Result<T, Box<dyn error::Error>>;
type Tree = petgraph::graph::UnGraph<String, f64>;

pub use naive_nj::canonical_neighbor_joining as canonical_neighbor_joining;
pub use rapid_nj::rapid_nj as rapid_nj_neighbor_joining;
pub use property_tests::tree_distances::robinson_foulds as robinson_foulds;
pub use property_tests::tree_distances::branch_score as branch_score;
6 changes: 3 additions & 3 deletions src/naive_nj/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{distances::DistanceMatrix, ResultBox, Tree};

use super::{phylo_tree::PhyloTree, qmatrix::QMatrix};

pub fn naive_neighbor_joining(dist: DistanceMatrix) -> ResultBox<Tree> {
pub fn canonical_neighbor_joining(dist: DistanceMatrix) -> ResultBox<Tree> {
let mut t = PhyloTree::build(&dist.names);
let mut q = QMatrix::build(dist);
while q.n_leaves() > 3 {
Expand All @@ -16,7 +16,7 @@ pub fn naive_neighbor_joining(dist: DistanceMatrix) -> ResultBox<Tree> {
Ok(terminate_nj(t, q))
}

pub fn terminate_nj(tree: PhyloTree, q: QMatrix) -> Tree {
pub(crate) fn terminate_nj(tree: PhyloTree, q: QMatrix) -> Tree {
let (i, j, m) = (tree.nodes[&0], tree.nodes[&1], tree.nodes[&2]);
let mut tree = tree.tree;

Expand Down Expand Up @@ -54,7 +54,7 @@ mod tests {
],
};

let phylo = naive_neighbor_joining(d);
let phylo = canonical_neighbor_joining(d);
assert!(phylo.is_ok());

let tree = phylo.unwrap();
Expand Down
10 changes: 5 additions & 5 deletions src/naive_nj/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ mod qmatrix;
// PhyloTree is a helper struct for the Naive Neighbor Joining algorithm.
mod phylo_tree;
// Export the public interface of the Naive Neighbor Joining algorithm.
pub use algorithm::naive_neighbor_joining;
pub use algorithm::terminate_nj;
pub use phylo_tree::PhyloTree;
pub use qmatrix::QMatrix;
pub struct DataNaiveNJ {
pub use algorithm::canonical_neighbor_joining;
pub(crate) use algorithm::terminate_nj;
pub(crate) use phylo_tree::PhyloTree;
pub(crate) use qmatrix::QMatrix;
pub(crate) struct DataNaiveNJ {
pub qmatrix: qmatrix::QMatrix,
pub phylo_tree: phylo_tree::PhyloTree,
}
2 changes: 1 addition & 1 deletion src/naive_nj/phylo_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl PhyloTree {
nodes,
}
}
pub fn build(leafs: &Vec<String>) -> PhyloTree {
pub fn build(leafs: &[String]) -> PhyloTree {
let mut tree: petgraph::Graph<String, f64, petgraph::Undirected> =
UnGraph::new_undirected();
let mut nodes = HashMap::new();
Expand Down
4 changes: 2 additions & 2 deletions src/property_tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ fn assert_equal_tree(a: &crate::Tree, b: &crate::Tree, i: usize) {
#[test]
fn test_random_additive_binary_trees_naive() {
use crate::{
naive_nj::naive_neighbor_joining,
naive_nj::canonical_neighbor_joining,
property_tests::random_additive_tree::{
distance_matrix_from_tree, random_unrooted_binary_tree,
},
};
for i in 4..20 {
let original_tree = random_unrooted_binary_tree(i);
let d = distance_matrix_from_tree(original_tree.clone());
let tree = naive_neighbor_joining(d).unwrap();
let tree = canonical_neighbor_joining(d).unwrap();
assert_equal_tree(&original_tree, &tree, i)
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/rapid_nj/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ mod node;
mod phylo_tree;
mod qmatrix;
pub use algorithm::rapid_nj;
pub use phylo_tree::PhyloTree;
pub use qmatrix::QMatrix;
pub(crate) use phylo_tree::PhyloTree;
pub(crate) use qmatrix::QMatrix;

pub struct DataRapidNJ {
pub(crate) struct DataRapidNJ {
pub qmatrix: QMatrix,
pub phylo_tree: phylo_tree::PhyloTree,
}
Expand Down
4 changes: 2 additions & 2 deletions src/rapid_nj/phylo_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use std::collections::HashMap;
use petgraph::{graph::UnGraph, stable_graph::NodeIndex};

#[derive(Debug, Clone)]
pub struct PhyloTree {
pub(crate) struct PhyloTree {
pub tree: crate::Tree,
pub nodes: HashMap<usize, NodeIndex>,
n_nodes: usize,
}

impl PhyloTree {
pub fn build(leafs: &Vec<String>) -> PhyloTree {
pub fn build(leafs: &[String]) -> PhyloTree {
let mut tree: petgraph::Graph<String, f64, petgraph::Undirected> =
UnGraph::new_undirected();
let mut nodes = HashMap::new();
Expand Down

0 comments on commit 2f5ab03

Please sign in to comment.