Skip to content

Commit

Permalink
apply clang-format to dbg_sshash
Browse files Browse the repository at this point in the history
  • Loading branch information
adamant-pwn committed Mar 27, 2024
1 parent a7ad381 commit d343e96
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 69 deletions.
107 changes: 64 additions & 43 deletions metagraph/src/graph/representation/hash/dbg_sshash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,44 +6,55 @@
namespace mtg {
namespace graph {

constexpr DeBruijnGraph::node_index sshash_to_graph_index(uint64_t idx) { return idx + 1; }
constexpr uint64_t graph_index_to_sshash(DeBruijnGraph::node_index idx) { return idx - 1; }
constexpr DeBruijnGraph::node_index sshash_to_graph_index(uint64_t idx) {
return idx + 1;
}
constexpr uint64_t graph_index_to_sshash(DeBruijnGraph::node_index idx) {
return idx - 1;
}

DBGSSHash::DBGSSHash(size_t k):k_(k) {}
DBGSSHash::DBGSSHash(size_t k) : k_(k) {}

DBGSSHash::DBGSSHash(std::string const& input_filename, size_t k, Mode mode):k_(k), mode_(mode) {
DBGSSHash::DBGSSHash(std::string const& input_filename, size_t k, Mode mode)
: k_(k), mode_(mode) {
sshash::build_configuration build_config;
build_config.k = k;
// quick fix for value of m... k/2 but odd
build_config.m = (k_+1)/2;
if(build_config.m % 2 == 0) build_config.m++;
build_config.m = (k_ + 1) / 2;
if (build_config.m % 2 == 0)
build_config.m++;
dict_.build(input_filename, build_config);
}

std::string DBGSSHash::file_extension() const { return kExtension; }
size_t DBGSSHash::get_k() const { return k_; }
DeBruijnGraph::Mode DBGSSHash::get_mode() const { return mode_; }
std::string DBGSSHash::file_extension() const {
return kExtension;
}
size_t DBGSSHash::get_k() const {
return k_;
}
DeBruijnGraph::Mode DBGSSHash::get_mode() const {
return mode_;
}

void DBGSSHash::add_sequence(std::string_view sequence,
const std::function<void(node_index)> &on_insertion) {
const std::function<void(node_index)>& on_insertion) {
throw std::runtime_error("adding sequences not implemented");
}

void DBGSSHash::map_to_nodes(std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate) const {
const std::function<void(node_index)>& callback,
const std::function<bool()>& terminate) const {
if (mode_ != CANONICAL) {
map_to_nodes_sequentially(sequence, callback, terminate);
} else {
map_to_nodes_with_rc(sequence, [&](node_index node, bool) {
callback(node);
}, terminate);
map_to_nodes_with_rc(
sequence, [&](node_index node, bool) { callback(node); }, terminate);
}
}

void DBGSSHash ::map_to_nodes_sequentially(std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate) const {
const std::function<void(node_index)>& callback,
const std::function<bool()>& terminate) const {
if (terminate() || sequence.size() < k_)
return;

Expand All @@ -57,12 +68,12 @@ void DBGSSHash ::map_to_nodes_sequentially(std::string_view sequence,
}

void DBGSSHash ::map_to_nodes_with_rc(std::string_view sequence,
const std::function<void(node_index, bool)> &callback,
const std::function<bool()> &terminate) const {
const std::function<void(node_index, bool)>& callback,
const std::function<bool()>& terminate) const {
sshash::streaming_query_regular_parsing<kmer_t> streamer(&dict_);
streamer.start();
for (size_t i = 0; i + k_ <= sequence.size() && !terminate(); ++i) {
const char *kmer = sequence.data() + i;
const char* kmer = sequence.data() + i;
auto res = streamer.lookup_advanced(kmer);
callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation);
}
Expand All @@ -73,7 +84,8 @@ DBGSSHash::node_index DBGSSHash::traverse(node_index node, char next_char) const
kmer_t new_kmer = sshash::util::string_to_uint_kmer<kmer_t>(string_kmer.c_str(), k_);
new_kmer.drop_char();
new_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(next_char));
uint64_t sshash_id = dict_.lookup_advanced_uint(new_kmer, /*check_reverse_complement*/false).kmer_id;
uint64_t sshash_id
= dict_.lookup_advanced_uint(new_kmer, /*check_reverse_complement*/ false).kmer_id;
return sshash_to_graph_index(sshash_id);
}

Expand All @@ -82,24 +94,25 @@ DBGSSHash::node_index DBGSSHash::traverse_back(node_index node, char prev_char)
kmer_t new_kmer = sshash::util::string_to_uint_kmer<kmer_t>(string_kmer.c_str(), k_);
new_kmer.append_char(prev_char);
new_kmer.take_chars(k_);
uint64_t sshash_id = dict_.lookup_advanced_uint(new_kmer, /*check_reverse_complement*/false).kmer_id;
uint64_t sshash_id
= dict_.lookup_advanced_uint(new_kmer, /*check_reverse_complement*/ false).kmer_id;
return sshash_to_graph_index(sshash_id);
}

void DBGSSHash ::adjacent_outgoing_nodes(node_index node,
const std::function<void(node_index)> &callback) const {
const std::function<void(node_index)>& callback) const {
assert(node > 0 && node <= num_nodes());
call_outgoing_kmers(node, [&](auto child, char) { callback(child); });
}

void DBGSSHash ::adjacent_incoming_nodes(node_index node,
const std::function<void(node_index)> &callback) const {
const std::function<void(node_index)>& callback) const {
assert(node > 0 && node <= num_nodes());
call_incoming_kmers(node, [&](auto parent, char) { callback(parent); });
}

void DBGSSHash ::call_outgoing_kmers(node_index node,
const OutgoingEdgeCallback &callback) const {
const OutgoingEdgeCallback& callback) const {
assert(node > 0 && node <= num_nodes());
std::string kmer = DBGSSHash::get_node_sequence(node);
sshash::neighbourhood<kmer_t> nb = dict_.kmer_forward_neighbours(kmer.c_str(), false);
Expand All @@ -111,7 +124,7 @@ void DBGSSHash ::call_outgoing_kmers(node_index node,
}

void DBGSSHash ::call_incoming_kmers(node_index node,
const IncomingEdgeCallback &callback) const {
const IncomingEdgeCallback& callback) const {
assert(node > 0 && node <= num_nodes());
std::string kmer = DBGSSHash::get_node_sequence(node);
sshash::neighbourhood<kmer_t> nb = dict_.kmer_backward_neighbours(kmer.c_str(), false);
Expand All @@ -122,27 +135,31 @@ void DBGSSHash ::call_incoming_kmers(node_index node,
}
}

void DBGSSHash ::call_outgoing_kmers_with_rc(node_index node,
const std::function<void(node_index, char, bool)> &callback) const {
void DBGSSHash ::call_outgoing_kmers_with_rc(
node_index node,
const std::function<void(node_index, char, bool)>& callback) const {
assert(node > 0 && node <= num_nodes());
std::string kmer = DBGSSHash::get_node_sequence(node);
sshash::neighbourhood<kmer_t> nb = dict_.kmer_forward_neighbours(kmer.c_str(), false);
for (size_t i = 0; i < kmer_t::alphabet_size; i++) {
if (nb.forward[i].kmer_id != sshash::constants::invalid_uint64) {
callback(sshash_to_graph_index(nb.forward[i].kmer_id), kmer_t::alphabet[i], nb.forward[i].kmer_orientation);
callback(sshash_to_graph_index(nb.forward[i].kmer_id), kmer_t::alphabet[i],
nb.forward[i].kmer_orientation);
}
}
}


void DBGSSHash ::call_incoming_kmers_with_rc(node_index node,
const std::function<void(node_index, char, bool)> &callback) const {
void DBGSSHash ::call_incoming_kmers_with_rc(
node_index node,
const std::function<void(node_index, char, bool)>& callback) const {
assert(node > 0 && node <= num_nodes());
std::string kmer = DBGSSHash::get_node_sequence(node);
sshash::neighbourhood<kmer_t> nb = dict_.kmer_backward_neighbours(kmer.c_str(), false);
for (size_t i = 0; i < kmer_t::alphabet_size; i++) {
if (nb.backward[i].kmer_id != sshash::constants::invalid_uint64) {
callback(sshash_to_graph_index(nb.backward[i].kmer_id), kmer_t::alphabet[i], nb.backward[i].kmer_orientation);
callback(sshash_to_graph_index(nb.backward[i].kmer_id), kmer_t::alphabet[i],
nb.backward[i].kmer_orientation);
}
}
}
Expand All @@ -164,7 +181,7 @@ size_t DBGSSHash::indegree(node_index node) const {
}

void DBGSSHash::call_kmers(
const std::function<void(node_index, const std::string &)> &callback) const {
const std::function<void(node_index, const std::string&)>& callback) const {
for (size_t node_idx = 1; node_idx <= num_nodes(); ++node_idx) {
callback(node_idx, get_node_sequence(node_idx));
}
Expand All @@ -174,7 +191,8 @@ DBGSSHash::node_index DBGSSHash::kmer_to_node(std::string_view kmer) const {
return num_nodes() ? dict_.lookup(kmer.begin(), false) + 1 : npos;
}

std::pair<DBGSSHash::node_index, bool> DBGSSHash::kmer_to_node_with_rc(std::string_view kmer) const {
std::pair<DBGSSHash::node_index, bool>
DBGSSHash::kmer_to_node_with_rc(std::string_view kmer) const {
if (!num_nodes())
return std::make_pair(npos, false);

Expand All @@ -189,24 +207,25 @@ std::string DBGSSHash::get_node_sequence(node_index node) const {
return str_kmer;
}

void DBGSSHash::serialize(std::ostream &out) const {
//TODO
void DBGSSHash::serialize(std::ostream& out) const {
// TODO
throw std::runtime_error("serialize to stream not implemented");
}

void DBGSSHash::serialize(const std::string &filename) const {
void DBGSSHash::serialize(const std::string& filename) const {
std::string suffixed_filename = utils::make_suffix(filename, kExtension);

// TODO: fix this in the essentials library. for some reason, it's saver takes a non-const ref
essentials::save(const_cast<sshash::dictionary<kmer_t>&>(dict_), suffixed_filename.c_str());
essentials::save(const_cast<sshash::dictionary<kmer_t>&>(dict_),
suffixed_filename.c_str());
}

bool DBGSSHash::load(std::istream &in) {
bool DBGSSHash::load(std::istream& in) {
throw std::runtime_error("load from stream not implemented");
return false;
}

bool DBGSSHash::load(const std::string &filename) {
bool DBGSSHash::load(const std::string& filename) {
std::string suffixed_filename = utils::make_suffix(filename, kExtension);
uint64_t num_bytes_read = essentials::load(dict_, suffixed_filename.c_str());
if (common::get_verbose()) {
Expand All @@ -219,16 +238,18 @@ bool DBGSSHash::load(const std::string &filename) {
return true;
}

bool DBGSSHash::operator==(const DeBruijnGraph &other) const {
bool DBGSSHash::operator==(const DeBruijnGraph& other) const {
throw std::runtime_error("operator== not implemented");
return false;
}

const std::string DBGSSHash::alphabet_ = "ACGT";
const std::string &DBGSSHash::alphabet() const {
const std::string& DBGSSHash::alphabet() const {
return alphabet_;
}

uint64_t DBGSSHash::num_nodes() const { return dict_.size(); }
uint64_t DBGSSHash::num_nodes() const {
return dict_.size();
}
} // namespace graph
} // namespace mtg
55 changes: 29 additions & 26 deletions metagraph/src/graph/representation/hash/dbg_sshash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,59 +14,60 @@ namespace mtg::graph {

class DBGSSHash : public DeBruijnGraph {
using kmer_t = sshash::dna_uint_kmer_t<uint64_t>;

public:
explicit DBGSSHash(size_t k);
DBGSSHash(std::string const& input_filename, size_t k, Mode mode = BASIC);

// SequenceGraph overrides
// SequenceGraph overrides
void add_sequence(
std::string_view sequence,
const std::function<void(node_index)> &on_insertion = [](node_index) {}) override;
const std::function<void(node_index)>& on_insertion = [](node_index) {}) override;

void map_to_nodes(
std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate = []() { return false; }) const override;
const std::function<void(node_index)>& callback,
const std::function<bool()>& terminate = []() { return false; }) const override;

void map_to_nodes_sequentially(
std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate = []() { return false; }) const override;
const std::function<void(node_index)>& callback,
const std::function<bool()>& terminate = []() { return false; }) const override;

void map_to_nodes_with_rc(
std::string_view sequence,
const std::function<void(node_index, bool)> &callback,
const std::function<bool()> &terminate = []() { return false; }) const;
const std::function<void(node_index, bool)>& callback,
const std::function<bool()>& terminate = []() { return false; }) const;

void adjacent_outgoing_nodes(node_index node,
const std::function<void(node_index)> &callback) const override;
const std::function<void(node_index)>& callback) const override;

void adjacent_incoming_nodes(node_index node,
const std::function<void(node_index)> &callback) const override;
const std::function<void(node_index)>& callback) const override;

uint64_t num_nodes() const override;

bool load(std::istream &in);
bool load(const std::string &filename) override;
bool load(std::istream& in);
bool load(const std::string& filename) override;

void serialize(std::ostream &out) const;
void serialize(const std::string &filename) const override;
void serialize(std::ostream& out) const;
void serialize(const std::string& filename) const override;

static constexpr auto kExtension = ".sshashdbg";
std::string file_extension() const override;

std::string get_node_sequence(node_index node) const override;

// DeBruijnGraph overrides
size_t get_k() const override ;
// DeBruijnGraph overrides
size_t get_k() const override;

// TODO: add the support for the canonical mode
Mode get_mode() const override;

node_index traverse(node_index node, char next_char) const override;
node_index traverse_back(node_index node, char prev_char) const override;

void call_kmers(const std::function<void(node_index, const std::string &)> &callback) const override;
void call_kmers(const std::function<void(node_index, const std::string&)>& callback) const override;

size_t outdegree(node_index) const override;

Expand All @@ -76,21 +77,23 @@ class DBGSSHash : public DeBruijnGraph {
std::pair<node_index, bool> kmer_to_node_with_rc(std::string_view kmer) const;

void call_outgoing_kmers(node_index node,
const OutgoingEdgeCallback &callback) const override;
const OutgoingEdgeCallback& callback) const override;

void call_outgoing_kmers_with_rc(node_index node,
const std::function<void(node_index, char, bool)> &callback) const;
void call_outgoing_kmers_with_rc(
node_index node,
const std::function<void(node_index, char, bool)>& callback) const;

void call_incoming_kmers(node_index node,
const IncomingEdgeCallback &callback) const override;
const IncomingEdgeCallback& callback) const override;

void call_incoming_kmers_with_rc(node_index node,
const std::function<void(node_index, char, bool)> &callback) const;
void call_incoming_kmers_with_rc(
node_index node,
const std::function<void(node_index, char, bool)>& callback) const;


bool operator==(const DeBruijnGraph &other) const override;
bool operator==(const DeBruijnGraph& other) const override;

const std::string &alphabet() const override;
const std::string& alphabet() const override;

const sshash::dictionary<kmer_t>& data() const { return dict_; }

Expand Down

0 comments on commit d343e96

Please sign in to comment.