Skip to content

Commit

Permalink
Split into crate & binary
Browse files Browse the repository at this point in the history
  • Loading branch information
currocam authored Mar 21, 2024
2 parents 997756a + 99b8ed3 commit 064f243
Show file tree
Hide file tree
Showing 17 changed files with 405 additions and 261 deletions.
12 changes: 7 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
[package]
name = "speedytree"
version = "0.1.0"
authors = ["Curro Campuzano <[email protected]>"]
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "speedytree"
path = "src/lib.rs"

[[bin]]
name = "speedytree"
path = "src/bin.rs"
[dependencies]
bit-set = "0.5.3"
bit-vec = "0.6.3"
Expand All @@ -17,7 +23,3 @@ petgraph = "0.6.4"
rand = "0.8.5"
rayon = "1.8.0"
rb_tree = "0.5.0"
[profile.release]
lto = true
codegen-units = 1
panic = "abort"
150 changes: 150 additions & 0 deletions src/bin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
extern crate speedytree;
use clap::Parser;
/// # speedytree
/// `speedytree` is a command line tool for quickly creating a directory tree.
/// It is a Rust implementation of the `tree` command line tool.
/// It is intended to be a drop-in replacement for the `tree` command.
/// It is not intended to be a complete implementation of the `tree` command.
/// It is intended to be a fast implementation of the `tree` command.
use speedytree::DistanceMatrix;
use speedytree::{Canonical, Hybrid, NeighborJoiningSolver, RapidBtrees};

use std::{
error,
io::{self, Write},
process,
};
type ResultBox<T> = std::result::Result<T, Box<dyn error::Error>>;

/// Define the configuration of the program
/// It contains the algorithm to use and the number of threads to use
///
#[derive(Debug)]
pub struct Config {
pub(crate) algo: Algorithm,
pub(crate) threads: usize,
pub(crate) chunk_size: usize,
pub(crate) naive_percentage: usize,
}

impl Config {
/// Build the configuration from the command line arguments
pub fn build(args: Args) -> ResultBox<Config> {
// Let match the algorithm, if not specified, use Naive
let algo = if args.naive {
Algorithm::Naive
} else if args.rapidnj {
Algorithm::RapidNJ
} else {
Algorithm::Hybrid
};
let cores = args.cores;
let chunk_size = args.chunk_size;
// If chunk size is 0, error
if chunk_size == 0 {
return Err("Chunk size cannot be 0".into());
}
let naive_percentage = args.naive_percentage;
// If naive percentage is 0, error
if naive_percentage == 0 {
return Err("Naive percentage cannot be 0".into());
}
// If naive percentage is 100, error
if naive_percentage == 100 {
return Err("Naive percentage cannot be 100".into());
}
Ok(Config {
algo,
threads: cores,
chunk_size,
naive_percentage,
})
}
}

/// Define the command line arguments
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub struct Args {
/// Use the rapidnj heuristic
#[arg(long, conflicts_with = "hybrid", conflicts_with = "naive")]
rapidnj: bool,
/// Use the naive algorithm
#[arg(long, conflicts_with = "hybrid", conflicts_with = "rapidnj")]
naive: bool,
/// Use the hybrid heuristic
#[arg(long, conflicts_with = "rapidnj", conflicts_with = "naive")]
hybrid: bool,
/// Number of cores to use
/// Default: 1
#[arg(short, long, default_value = "1")]
cores: usize,
/// Chunk size to be handled by each thread
/// Default: 30
#[arg(long, default_value = "30", conflicts_with = "naive")]
chunk_size: usize,
/// Percentage of the matrix to be handled by the naive algorithm
/// Default: 90
#[arg(
long,
default_value = "90",
conflicts_with = "naive",
conflicts_with = "rapidnj"
)]
naive_percentage: usize,
}

/// Available algorithms in the program
#[derive(Debug, Clone)]
pub enum Algorithm {
/// Naive neighbor joining
Naive,
/// Rapid neighbor joining
RapidNJ,
/// Hybrid neighbor joining
Hybrid,
}
/// Main function of the crate
pub fn run(config: Config) {
rayon::ThreadPoolBuilder::new()
.num_threads(config.threads)
.build_global()
.unwrap();

let reader = io::stdin().lock();
let d = DistanceMatrix::read_from_phylip(reader).unwrap_or_else(|err| {
eprintln!("{err}");
process::exit(1);
});

let d = match config.algo {
Algorithm::Naive => NeighborJoiningSolver::<Canonical>::default(d).solve(),
Algorithm::RapidNJ => {
NeighborJoiningSolver::<RapidBtrees>::build(d, config.chunk_size).solve()
}
Algorithm::Hybrid => {
let naive_steps = d.size() * config.naive_percentage / 100;
NeighborJoiningSolver::<Hybrid>::build(d, config.chunk_size, naive_steps).solve()
}
};
let graph = d.unwrap_or_else(|err| {
eprintln!("{err}");
process::exit(1);
});
io::stdout()
.write_all(speedytree::to_newick(&graph).as_bytes())
.unwrap_or_else(|err| {
eprintln!("{err}");
process::exit(1);
});
}

fn main() {
let args = Args::parse();
//dbg!(&args);
let config = Config::build(args).unwrap_or_else(|err| {
eprintln!("Problem parsing arguments: {err}");
process::exit(1);
});
run(config);
}
81 changes: 0 additions & 81 deletions src/configuration.rs
Original file line number Diff line number Diff line change
@@ -1,81 +0,0 @@
use crate::Algorithm;

use super::ResultBox;
use clap::Parser;
/// Define the configuration of the program
/// It contains the algorithm to use and the number of threads to use
///
#[derive(Debug)]
pub struct Config {
pub(crate) algo: Algorithm,
pub(crate) threads: usize,
pub(crate) chunk_size: usize,
pub(crate) naive_percentage: usize,
}

impl Config {
/// Build the configuration from the command line arguments
pub fn build(args: Args) -> ResultBox<Config> {
// Let match the algorithm, if not specified, use Naive
let algo = if args.naive {
Algorithm::Naive
} else if args.rapidnj {
Algorithm::RapidNJ
} else {
Algorithm::Hybrid
};
let cores = args.cores;
let chunk_size = args.chunk_size;
// If chunk size is 0, error
if chunk_size == 0 {
return Err("Chunk size cannot be 0".into());
}
let naive_percentage = args.naive_percentage;
// If naive percentage is 0, error
if naive_percentage == 0 {
return Err("Naive percentage cannot be 0".into());
}
// If naive percentage is 100, error
if naive_percentage == 100 {
return Err("Naive percentage cannot be 100".into());
}
Ok(Config {
algo,
threads: cores,
chunk_size,
naive_percentage,
})
}
}

/// Define the command line arguments
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub struct Args {
/// Use the rapidnj heuristic
#[arg(long, conflicts_with = "hybrid", conflicts_with = "naive")]
rapidnj: bool,
/// Use the naive algorithm
#[arg(long, conflicts_with = "hybrid", conflicts_with = "rapidnj")]
naive: bool,
/// Use the hybrid heuristic
#[arg(long, conflicts_with = "rapidnj", conflicts_with = "naive")]
hybrid: bool,
/// Number of cores to use
/// Default: 1
#[arg(short, long, default_value = "1")]
cores: usize,
/// Chunk size to be handled by each thread
/// Default: 30
#[arg(long, default_value = "30", conflicts_with = "naive")]
chunk_size: usize,
/// Percentage of the matrix to be handled by the naive algorithm
/// Default: 90
#[arg(
long,
default_value = "90",
conflicts_with = "naive",
conflicts_with = "rapidnj"
)]
naive_percentage: usize,
}
45 changes: 6 additions & 39 deletions src/distances.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use rand::seq::SliceRandom;

use crate::ResultBox;
use std::io::{self};
/// Distance matrix struct
Expand All @@ -13,7 +11,7 @@ pub struct DistanceMatrix {

/// Distance matrix from a phylip file
impl DistanceMatrix {
pub fn build_from_phylip<R>(mut reader: R) -> ResultBox<DistanceMatrix>
pub fn read_from_phylip<R>(mut reader: R) -> ResultBox<DistanceMatrix>
where
R: io::BufRead,
{
Expand All @@ -40,42 +38,11 @@ impl DistanceMatrix {
pub fn size(&self) -> usize {
self.matrix.len()
}
/// Permutate the distance matrix for testing purposes
pub fn permutate(&mut self) {
let mut rng = rand::thread_rng();
let mut perm = (0..self.size()).collect::<Vec<usize>>();
perm.shuffle(&mut rng);
let mut new_matrix = vec![vec![0.0; self.size()]; self.size()];
for i in 0..self.size() {
for j in 0..self.size() {
new_matrix[i][j] = self.matrix[perm[i]][perm[j]];
}
}
self.matrix = new_matrix;
let mut new_names = vec![String::new(); self.size()];
for i in 0..self.size() {
new_names[i] = self.names[perm[i]].clone();
}
self.names = new_names;
}
/// Example from Wikipedia, https://en.wikipedia.org/wiki/Neighbor_joining
pub fn wikipedia_example() -> DistanceMatrix {
DistanceMatrix {
matrix: vec![
vec![0.0, 5.0, 9.0, 9.0, 8.0],
vec![5.0, 0.0, 10.0, 10.0, 9.0],
vec![9.0, 10.0, 0.0, 8.0, 7.0],
vec![9.0, 10.0, 8.0, 0.0, 3.0],
vec![8.0, 9.0, 7.0, 3.0, 0.0],
],
names: vec![
"A".to_string(),
"B".to_string(),
"C".to_string(),
"D".to_string(),
"E".to_string(),
],
pub fn build(matrix: Vec<Vec<f64>>, names: Vec<String>) -> ResultBox<DistanceMatrix> {
if matrix.len() != names.len() {
return Err("Matrix and names have different lengths".into());
}
Ok(DistanceMatrix { matrix, names })
}
}

Expand All @@ -95,7 +62,7 @@ D 9.0 10.0 8.0 0.0
"
.as_bytes();
// run function
let distance_matrix = DistanceMatrix::build_from_phylip::<&[u8]>(input).unwrap();
let distance_matrix = DistanceMatrix::read_from_phylip::<&[u8]>(input).unwrap();
// check result
assert_eq!(
distance_matrix.matrix,
Expand Down
5 changes: 2 additions & 3 deletions src/hybrid_nj/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::{
distances::DistanceMatrix, naive_nj::DataNaiveNJ, rapid_nj::DataRapidNJ, ResultBox, Tree,
};

/// Main function of the crate
/// This approach is a hybrid between the naive neighbor joining and the rapid neighbor joining.
/// If `naive_iters` is greater than n, then this function calls `naive_neighbor_joining` instead.
/// If `naive_iters` is less than 4, then this function calls `rapid_nj` instead.
Expand All @@ -19,10 +18,10 @@ pub fn neighbor_joining(
chunk_size: usize,
) -> ResultBox<Tree> {
if dist.size() < 4 || naive_iters >= dist.size() {
return crate::naive_neighbor_joining(dist);
return crate::naive_nj::canonical_neighbor_joining(dist);
}
if naive_iters < 4 {
return crate::rapid_nj(dist, chunk_size);
return crate::rapid_nj::rapid_nj(dist, chunk_size);
}
let mut q = crate::rapid_nj::QMatrix::from(&dist);
let mut t = crate::rapid_nj::PhyloTree::build(&dist.names);
Expand Down
20 changes: 17 additions & 3 deletions src/hybrid_nj/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,26 @@ pub use algorithm::neighbor_joining;

#[cfg(test)]
mod tests {
use crate::distances::DistanceMatrix;

use super::*;
use crate::distances::DistanceMatrix;
#[test]
fn test_example_wikipedia() {
let d = DistanceMatrix::wikipedia_example();
let d = DistanceMatrix {
matrix: vec![
vec![0.0, 5.0, 9.0, 9.0, 8.0],
vec![5.0, 0.0, 10.0, 10.0, 9.0],
vec![9.0, 10.0, 0.0, 8.0, 7.0],
vec![9.0, 10.0, 8.0, 0.0, 3.0],
vec![8.0, 9.0, 7.0, 3.0, 0.0],
],
names: vec![
"A".to_string(),
"B".to_string(),
"C".to_string(),
"D".to_string(),
"E".to_string(),
],
};
let phylo = neighbor_joining(d, 4, 1);
assert!(phylo.is_ok());
let tree = phylo.unwrap();
Expand Down
Loading

0 comments on commit 064f243

Please sign in to comment.