From 2f5ab03a46200e4609412389e57975874c35a538 Mon Sep 17 00:00:00 2001 From: Curro Campuzano Date: Wed, 20 Mar 2024 20:08:07 +0100 Subject: [PATCH] Make a minimal functional API --- Cargo.lock | 7 +++++ Cargo.toml | 1 + src/bin.rs | 7 +++-- src/distances.rs | 8 +++--- src/hybrid_nj/algorithm.rs | 5 ++-- src/lib.rs | 56 ++++++++++++++++++------------------- src/naive_nj/algorithm.rs | 6 ++-- src/naive_nj/mod.rs | 10 +++---- src/naive_nj/phylo_tree.rs | 2 +- src/property_tests/tests.rs | 4 +-- src/rapid_nj/mod.rs | 6 ++-- src/rapid_nj/phylo_tree.rs | 4 +-- 12 files changed, 62 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61f9ade..a126792 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,6 +50,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" + [[package]] name = "autocfg" version = "1.1.0" @@ -406,6 +412,7 @@ checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" name = "speedytree" version = "0.1.0" dependencies = [ + "anyhow", "bit-set", "bit-vec", "bitvec", diff --git a/Cargo.toml b/Cargo.toml index 2aa631a..0cf77b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ path = "src/lib.rs" name = "speedytree" path = "src/bin.rs" [dependencies] +anyhow = "1.0.81" bit-set = "0.5.3" bit-vec = "0.6.3" bitvec = "1.0.1" diff --git a/src/bin.rs b/src/bin.rs index 9216931..73965b1 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,3 +1,4 @@ +extern crate speedytree; use clap::Parser; /// # speedytree /// `speedytree` is a command line tool for quickly creating a directory tree. @@ -9,7 +10,7 @@ use hybrid_nj::neighbor_joining; use speedytree::hybrid_nj; use speedytree::distances::DistanceMatrix; -use speedytree::naive_nj::naive_neighbor_joining; +use speedytree::naive_nj::canonical_neighbor_joining; use speedytree::newick::to_newick; use speedytree::rapid_nj::rapid_nj; use std::{ @@ -117,13 +118,13 @@ pub fn run(config: Config) { .unwrap(); let reader = io::stdin().lock(); - let d = DistanceMatrix::build_from_phylip(reader).unwrap_or_else(|err| { + let d = DistanceMatrix::read_from_phylip(reader).unwrap_or_else(|err| { eprintln!("{err}"); process::exit(1); }); let d = match config.algo { - Algorithm::Naive => naive_neighbor_joining(d), + Algorithm::Naive => canonical_neighbor_joining(d), Algorithm::RapidNJ => rapid_nj(d, config.chunk_size), Algorithm::Hybrid => { let naive_steps = d.size() * config.naive_percentage / 100; diff --git a/src/distances.rs b/src/distances.rs index 80894a8..7ee6806 100644 --- a/src/distances.rs +++ b/src/distances.rs @@ -13,7 +13,7 @@ pub struct DistanceMatrix { /// Distance matrix from a phylip file impl DistanceMatrix { - pub fn build_from_phylip(mut reader: R) -> ResultBox + pub fn read_from_phylip(mut reader: R) -> ResultBox where R: io::BufRead, { @@ -41,7 +41,7 @@ impl DistanceMatrix { self.matrix.len() } /// Permutate the distance matrix for testing purposes - pub fn permutate(&mut self) { + pub(crate) fn permutate(&mut self) { let mut rng = rand::thread_rng(); let mut perm = (0..self.size()).collect::>(); perm.shuffle(&mut rng); @@ -59,7 +59,7 @@ impl DistanceMatrix { self.names = new_names; } /// Example from Wikipedia, https://en.wikipedia.org/wiki/Neighbor_joining - pub fn wikipedia_example() -> DistanceMatrix { + pub(crate) fn wikipedia_example() -> DistanceMatrix { DistanceMatrix { matrix: vec![ vec![0.0, 5.0, 9.0, 9.0, 8.0], @@ -95,7 +95,7 @@ D 9.0 10.0 8.0 0.0 " .as_bytes(); // run function - let distance_matrix = DistanceMatrix::build_from_phylip::<&[u8]>(input).unwrap(); + let distance_matrix = DistanceMatrix::read_from_phylip::<&[u8]>(input).unwrap(); // check result assert_eq!( distance_matrix.matrix, diff --git a/src/hybrid_nj/algorithm.rs b/src/hybrid_nj/algorithm.rs index 5f9f212..c7ce16c 100644 --- a/src/hybrid_nj/algorithm.rs +++ b/src/hybrid_nj/algorithm.rs @@ -2,7 +2,6 @@ use crate::{ distances::DistanceMatrix, naive_nj::DataNaiveNJ, rapid_nj::DataRapidNJ, ResultBox, Tree, }; -/// Main function of the crate /// This approach is a hybrid between the naive neighbor joining and the rapid neighbor joining. /// If `naive_iters` is greater than n, then this function calls `naive_neighbor_joining` instead. /// If `naive_iters` is less than 4, then this function calls `rapid_nj` instead. @@ -19,10 +18,10 @@ pub fn neighbor_joining( chunk_size: usize, ) -> ResultBox { if dist.size() < 4 || naive_iters >= dist.size() { - return crate::naive_neighbor_joining(dist); + return crate::naive_nj::canonical_neighbor_joining(dist); } if naive_iters < 4 { - return crate::rapid_nj(dist, chunk_size); + return crate::rapid_nj::rapid_nj(dist, chunk_size); } let mut q = crate::rapid_nj::QMatrix::from(&dist); let mut t = crate::rapid_nj::PhyloTree::build(&dist.names); diff --git a/src/lib.rs b/src/lib.rs index 98529e3..cfe4313 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,41 +1,41 @@ -//! speedytree: Command line tool for Neighbor Joining of biological sequences -//! -//! It implements different heuristics for fast Neighbor-Joining. -//! -//! 1. Naive Neighbor-Joining -//! 2. RapidNJ -//! 3. Hybrid +//! Canonical and RapidNJ implementations of Neighbor-joining in Rust //! +//! Provides Rust implementation of the Canonical algorithm and something in the spirit of RapidNJ but with B-trees. Work in progress. +//! A minimal example is provided here. +//! ``` +//! use speedytree::distances::DistanceMatrix; +//! use speedytree::canonical_neighbor_joining; +//! use speedytree::rapid_nj_neighbor_joining; +//! use speedytree::robinson_foulds; +//!// Raw Phylip format +//!let input = "5 +//! a 0 5 9 9 8 +//! b 5 0 10 10 9 +//! c 9 10 0 8 7 +//! d 9 10 8 0 3 +//! e 8 9 7 3 0 +//!".as_bytes();; +//!let distances = DistanceMatrix::read_from_phylip(input).unwrap(); +//! // Use canonical algorithm +//!let tree1 = canonical_neighbor_joining(distances.clone()).unwrap(); +//! // Use RapidNJ with b-trees- +//!let tree2 = rapid_nj_neighbor_joining(distances.clone(), 2).unwrap(); +//! assert_eq!(robinson_foulds(tree1, tree2, 5), 0); +//! ``` -#![warn(missing_docs)] -/// Hybrid neighbor joining algorithm -/// This approach is a hybrid between the naive neighbor joining and the rapid neighbor joining. -/// The idea is to use the rapidnj heuristic first to potentially stop the algorithm early, -/// and then use the naive neighbor joining to finish the algorithm, which is faster -/// in practice, but performs more comparisons in theory. -/// However, both algorithms are O(n^3), so the difference is not that big. pub mod hybrid_nj; /// Property tests for neighbor joining algorithm pub mod property_tests; - -/// Configuration of the program -pub mod configuration; pub mod distances; pub mod naive_nj; pub mod newick; pub mod rapid_nj; -use hybrid_nj::neighbor_joining; - -use crate::distances::DistanceMatrix; -use crate::naive_nj::naive_neighbor_joining; -use crate::newick::to_newick; -use crate::rapid_nj::rapid_nj; -use std::{ - error, - io::{self, Write}, - process, -}; +use std::error; type ResultBox = std::result::Result>; type Tree = petgraph::graph::UnGraph; +pub use naive_nj::canonical_neighbor_joining as canonical_neighbor_joining; +pub use rapid_nj::rapid_nj as rapid_nj_neighbor_joining; +pub use property_tests::tree_distances::robinson_foulds as robinson_foulds; +pub use property_tests::tree_distances::branch_score as branch_score; \ No newline at end of file diff --git a/src/naive_nj/algorithm.rs b/src/naive_nj/algorithm.rs index 046ea8a..d479f83 100644 --- a/src/naive_nj/algorithm.rs +++ b/src/naive_nj/algorithm.rs @@ -2,7 +2,7 @@ use crate::{distances::DistanceMatrix, ResultBox, Tree}; use super::{phylo_tree::PhyloTree, qmatrix::QMatrix}; -pub fn naive_neighbor_joining(dist: DistanceMatrix) -> ResultBox { +pub fn canonical_neighbor_joining(dist: DistanceMatrix) -> ResultBox { let mut t = PhyloTree::build(&dist.names); let mut q = QMatrix::build(dist); while q.n_leaves() > 3 { @@ -16,7 +16,7 @@ pub fn naive_neighbor_joining(dist: DistanceMatrix) -> ResultBox { Ok(terminate_nj(t, q)) } -pub fn terminate_nj(tree: PhyloTree, q: QMatrix) -> Tree { +pub(crate) fn terminate_nj(tree: PhyloTree, q: QMatrix) -> Tree { let (i, j, m) = (tree.nodes[&0], tree.nodes[&1], tree.nodes[&2]); let mut tree = tree.tree; @@ -54,7 +54,7 @@ mod tests { ], }; - let phylo = naive_neighbor_joining(d); + let phylo = canonical_neighbor_joining(d); assert!(phylo.is_ok()); let tree = phylo.unwrap(); diff --git a/src/naive_nj/mod.rs b/src/naive_nj/mod.rs index 44964da..add5d66 100644 --- a/src/naive_nj/mod.rs +++ b/src/naive_nj/mod.rs @@ -5,11 +5,11 @@ mod qmatrix; // PhyloTree is a helper struct for the Naive Neighbor Joining algorithm. mod phylo_tree; // Export the public interface of the Naive Neighbor Joining algorithm. -pub use algorithm::naive_neighbor_joining; -pub use algorithm::terminate_nj; -pub use phylo_tree::PhyloTree; -pub use qmatrix::QMatrix; -pub struct DataNaiveNJ { +pub use algorithm::canonical_neighbor_joining; +pub(crate) use algorithm::terminate_nj; +pub(crate) use phylo_tree::PhyloTree; +pub(crate) use qmatrix::QMatrix; +pub(crate) struct DataNaiveNJ { pub qmatrix: qmatrix::QMatrix, pub phylo_tree: phylo_tree::PhyloTree, } diff --git a/src/naive_nj/phylo_tree.rs b/src/naive_nj/phylo_tree.rs index 8a2055c..3eb1fdd 100644 --- a/src/naive_nj/phylo_tree.rs +++ b/src/naive_nj/phylo_tree.rs @@ -20,7 +20,7 @@ impl PhyloTree { nodes, } } - pub fn build(leafs: &Vec) -> PhyloTree { + pub fn build(leafs: &[String]) -> PhyloTree { let mut tree: petgraph::Graph = UnGraph::new_undirected(); let mut nodes = HashMap::new(); diff --git a/src/property_tests/tests.rs b/src/property_tests/tests.rs index bf5e7f5..c106cc5 100644 --- a/src/property_tests/tests.rs +++ b/src/property_tests/tests.rs @@ -8,7 +8,7 @@ fn assert_equal_tree(a: &crate::Tree, b: &crate::Tree, i: usize) { #[test] fn test_random_additive_binary_trees_naive() { use crate::{ - naive_nj::naive_neighbor_joining, + naive_nj::canonical_neighbor_joining, property_tests::random_additive_tree::{ distance_matrix_from_tree, random_unrooted_binary_tree, }, @@ -16,7 +16,7 @@ fn test_random_additive_binary_trees_naive() { for i in 4..20 { let original_tree = random_unrooted_binary_tree(i); let d = distance_matrix_from_tree(original_tree.clone()); - let tree = naive_neighbor_joining(d).unwrap(); + let tree = canonical_neighbor_joining(d).unwrap(); assert_equal_tree(&original_tree, &tree, i) } } diff --git a/src/rapid_nj/mod.rs b/src/rapid_nj/mod.rs index a79cf8b..b649fe8 100644 --- a/src/rapid_nj/mod.rs +++ b/src/rapid_nj/mod.rs @@ -3,10 +3,10 @@ mod node; mod phylo_tree; mod qmatrix; pub use algorithm::rapid_nj; -pub use phylo_tree::PhyloTree; -pub use qmatrix::QMatrix; +pub(crate) use phylo_tree::PhyloTree; +pub(crate) use qmatrix::QMatrix; -pub struct DataRapidNJ { +pub(crate) struct DataRapidNJ { pub qmatrix: QMatrix, pub phylo_tree: phylo_tree::PhyloTree, } diff --git a/src/rapid_nj/phylo_tree.rs b/src/rapid_nj/phylo_tree.rs index 636d4a4..37b2f17 100644 --- a/src/rapid_nj/phylo_tree.rs +++ b/src/rapid_nj/phylo_tree.rs @@ -3,14 +3,14 @@ use std::collections::HashMap; use petgraph::{graph::UnGraph, stable_graph::NodeIndex}; #[derive(Debug, Clone)] -pub struct PhyloTree { +pub(crate) struct PhyloTree { pub tree: crate::Tree, pub nodes: HashMap, n_nodes: usize, } impl PhyloTree { - pub fn build(leafs: &Vec) -> PhyloTree { + pub fn build(leafs: &[String]) -> PhyloTree { let mut tree: petgraph::Graph = UnGraph::new_undirected(); let mut nodes = HashMap::new();