Skip to content

Commit

Permalink
fix instruction-dependent result difference and seg fault due to larg…
Browse files Browse the repository at this point in the history
…e memory usage (#66)
  • Loading branch information
Yan Gao committed Apr 5, 2024
1 parent 8a2dc2f commit e234dce
Show file tree
Hide file tree
Showing 9 changed files with 350 additions and 268 deletions.
18 changes: 13 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,28 @@ default: abpoa
#CC = gcc
OS := $(shell uname)
ARCH := $(shell arch)
EXTRA_FLAGS = -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES
CFLAGS = -Wall -O3 $(EXTRA_FLAGS)
# add -fno-tree-vectorize to avoid certain vectorization errors in O3 optimization
# right now, we are using -O3 for the best performance, and no vectorization errors were found
EXTRA_FLAGS = -Wall -Wno-unused-function -Wno-misleading-indentation -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES # -fno-tree-vectorize

DFLAGS =
# for debug
ifneq ($(debug),)
DFLAGS = -D __DEBUG__
DFLAGS += -D __DEBUG__
endif
ifneq ($(sdebug),)
DFLAGS += -D __SIMD_DEBUG__
endif

# for gdb
ifneq ($(gdb),)
CFLAGS = -Wall -g ${DFLAGS} $(EXTRA_FLAGS)
OPT_FLAGS = -g
else
CFLAGS = -Wall -O3 ${DFLAGS} $(EXTRA_FLAGS)
OPT_FLAGS = -O3
endif

CFLAGS = OPT_FLAGS ${DFLAGS} $(EXTRA_FLAGS)

# for gprof
ifneq ($(pg),)
PG_FLAG = -pg
Expand Down
9 changes: 9 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# To test after commit
1. run through
1. SSE/AVX2/AVX512
2. 16/32bits
3. global/local
4. lg/ag/cg
2. same result
1. SSE vs AVX2 vs AVX512
2. 16bits vs 32 bits
5 changes: 5 additions & 0 deletions include/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
#define ABPOA_HB 0
#define ABPOA_HC 1

#define ABPOA_NONE_VERBOSE 0
#define ABPOA_INFO_VERBOSE 1
#define ABPOA_DEBUG_VERBOSE 2
#define ABPOA_LONG_DEBUG_VERBOSE 3

// NOTE: upper boundary of in_edge_n is pow(2,30)
// for MATCH/MISMATCH: node_id << 34 | query_id << 4 | op
// for INSERTION: query_id << 34 | op_len << 4 | op
Expand Down
2 changes: 2 additions & 0 deletions src/abpoa.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const struct option abpoa_long_opt [] = {

{ "help", 0, NULL, 'h' },
{ "version", 0, NULL, 'v' },
{ "verbose", 1, NULL, 'V'},

{ 0, 0, 0, 0}
};
Expand Down Expand Up @@ -122,6 +123,7 @@ int abpoa_usage(void)

err_printf(" -h --help print this help usage information\n");
err_printf(" -v --version show version number\n");
err_printf(" -V --verbose INT verbose level (0-2). 0: none, 1: information, 2: debug [0]\n");


err_printf("\n");
Expand Down
5 changes: 5 additions & 0 deletions src/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
#define ABPOA_HB 0
#define ABPOA_HC 1

#define ABPOA_NONE_VERBOSE 0
#define ABPOA_INFO_VERBOSE 1
#define ABPOA_DEBUG_VERBOSE 2
#define ABPOA_LONG_DEBUG_VERBOSE 3

// NOTE: upper boundary of in_edge_n is pow(2,30)
// for MATCH/MISMATCH: node_id << 34 | query_id << 4 | op
// for INSERTION: query_id << 34 | op_len << 4 | op
Expand Down
19 changes: 5 additions & 14 deletions src/abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ abpoa_para_t *abpoa_init_para(void) {
abpt->min_w = ABPOA_MIN_POA_WIN;
abpt->progressive_poa = 0; // progressive partial order alignment

abpt->verbose = 0;
abpt->verbose = ABPOA_NONE_VERBOSE;

// abpt->simd_flag = simd_check();

Expand Down Expand Up @@ -199,9 +199,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
// uint8_t *seq1;
for (_i = 0; _i < n_seq; ++_i) {
i = read_id_map[_i]; read_id = exist_n_seq + i; qlen = seq_lens[i]; whole_res.n_cigar = 0, whole_res.m_cigar = 0, whole_res.graph_cigar = 0;
#ifdef __DEBUG__
fprintf(stderr, "seq: # %d\n", i);
#endif
if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
// seq-to-graph alignment and add alignment within each split window
if (_i == 0) ai = 0; else ai = par_c[_i-1];

Expand Down Expand Up @@ -252,10 +250,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
for (; ai < par_c[_i]; ++ai) {
end_tpos = ((par_anchors.a[ai] >> 32) & 0x7fffffff) - k + 1; end_id = tpos_to_node_id[end_tpos];
end_qpos = (uint32_t)par_anchors.a[ai] - k + 1;

#ifdef __DEBUG__
fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
#endif
if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);

res.graph_cigar = 0; res.n_cigar = 0;
abpoa_align_sequence_to_subgraph(ab, abpt, beg_id, end_id, qseq+beg_qpos, end_qpos-beg_qpos, &res);
Expand All @@ -277,9 +272,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
}
end_id = ABPOA_SINK_NODE_ID; end_qpos = seq_lens[i];

#ifdef __DEBUG__
fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
#endif
if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "\tanchor: t: %d (id: %d), q: %d\n", end_tpos, end_id, end_qpos);
res.graph_cigar = 0; res.n_cigar = 0;
abpoa_align_sequence_to_subgraph(ab, abpt, beg_id, end_id, qseq+beg_qpos, end_qpos-beg_qpos, &res);
abpoa_push_whole_cigar(&whole_res.n_cigar, &whole_res.m_cigar, &whole_res.graph_cigar, res.n_cigar, res.graph_cigar);
Expand Down Expand Up @@ -307,9 +300,7 @@ int abpoa_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weights, in
// uint8_t *seq1;
for (i = 0; i < n_seq; ++i) {
qlen = seq_lens[i]; qseq = seqs[i]; weight = weights[i]; read_id = exist_n_seq + i;
#ifdef __DEBUG__
fprintf(stderr, "seq: # %d\n", i);
#endif
if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
res.graph_cigar = 0; res.n_cigar = 0;
if (abpoa_align_sequence_to_graph(ab, abpt, qseq, qlen, &res) >= 0) {
if (abpt->amb_strand && (res.best_score < MIN_OF_TWO(qlen, ab->abg->node_n-2) * abpt->max_mat * .3333)) { // TODO .3333
Expand Down
49 changes: 25 additions & 24 deletions src/abpoa_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ int reassign_hap(int **clu_haps, int *clu_size, uint64_t **clu_read_ids, int rea

// read_weight is NOT used here, no matter use_qv is set or not.
// collect minimized set of het bases
int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids, int *het_poss, int **rc_weight, int msa_l, int n_seq, int m, int min_w, int read_ids_n) {
int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids, int *het_poss, int **rc_weight, int msa_l, int n_seq, int m, int min_w, int read_ids_n, int verbose) {
int i, j, k, n, rank;
uint64_t b, one = 1, *whole_read_ids = (uint64_t*)_err_calloc(read_ids_n, sizeof(uint64_t));
for (i = 0; i < n_seq; ++i) {
Expand Down Expand Up @@ -782,12 +782,12 @@ int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids
if (iden == 1) continue;

het_poss[n_het_pos++] = rank;
#ifdef __DEBUG__
fprintf(stderr, "%d\t", rank);
for (j = 0; j < m; ++j) {
fprintf(stderr, "%c: %d\t", "ACGT-"[j], rc_weight[rank][j]);
} fprintf(stderr, "\n");
#endif
if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
fprintf(stderr, "%d\t", rank);
for (j = 0; j < m; ++j) {
fprintf(stderr, "%c: %d\t", "ACGT-"[j], rc_weight[rank][j]);
} fprintf(stderr, "\n");
}
}
}
free(n_branch); free(node_map);
Expand All @@ -796,7 +796,8 @@ int abpoa_set_het_row_column_ids_weight(abpoa_graph_t *abg, uint64_t ***read_ids

// group read into clusters based on all het bases
// initial cluster size could be > max_n_cons
int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***read_ids, int read_ids_n, int n_seq, int m, int min_w, int max_n_cons, uint64_t ***clu_read_ids, int *_m_clu) {
int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***read_ids, int read_ids_n,
int n_seq, int m, int min_w, int max_n_cons, uint64_t ***clu_read_ids, int *_m_clu, int verbose) {
if (n_het_pos == 0) return 1;
int i, j, k, n_clu = 0, m_clu = 2;
int **clu_haps = (int**)_err_malloc(2 * sizeof(int*));
Expand Down Expand Up @@ -832,33 +833,33 @@ int abpoa_collect_clu_hap_read_ids(int *het_poss, int n_het_pos, uint64_t ***rea
}
}
if (n_clu < 2) err_fatal(__func__, "# haplotypes: %d\n", n_clu);
#ifdef __DEBUG__
fprintf(stderr, "n_clu: %d\n", n_clu);
for (i = 0; i < n_clu; ++i) {
for (j = 0; j < n_het_pos; ++j) {
fprintf(stderr, "%d\t", clu_haps[i][j]);
if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
fprintf(stderr, "n_clu: %d\n", n_clu);
for (i = 0; i < n_clu; ++i) {
for (j = 0; j < n_het_pos; ++j) {
fprintf(stderr, "%d\t", clu_haps[i][j]);
}
fprintf(stderr, "\tsize: %d\n", clu_size[i]);
}
fprintf(stderr, "\tsize: %d\n", clu_size[i]);
}
#endif

// assign haplotype with reads < min_w to haplotype with reads >= min_w
// keep at most _max_n_cons_ haps and read ids, weight need to >= min_w
n_clu = reassign_hap(clu_haps, clu_size, *clu_read_ids, read_ids_n, n_clu, min_w, max_n_cons, n_het_pos);
#ifdef __DEBUG__
fprintf(stderr, "After re-assign: n_clu: %d\n", n_clu);
for (i = 0; i < n_clu; ++i) {
fprintf(stderr, "%d:\tsize: %d\n", i, clu_size[i]);
if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
fprintf(stderr, "After re-assign: n_clu: %d\n", n_clu);
for (i = 0; i < n_clu; ++i) {
fprintf(stderr, "%d:\tsize: %d\n", i, clu_size[i]);
}
}
#endif
for (i = 0; i < m_clu; ++i) free(clu_haps[i]); free(clu_haps); free(clu_size);
*_m_clu = m_clu;
return n_clu;
}

// read_weight is NOT used here
// cluster reads into _n_clu_ groups based on heterogeneous bases
int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq, int m, int max_n_cons, double min_freq, uint64_t ***clu_read_ids, int *_m_clu) {
int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq, int m, int max_n_cons, double min_freq, uint64_t ***clu_read_ids, int *_m_clu, int verbose) {
abpoa_set_msa_rank(abg, src_id, sink_id);
int i, j, n_clu, m_clu, read_ids_n = (n_seq-1)/64+1;
int msa_l = abg->node_id_to_msa_rank[sink_id]-1, min_w = MAX_OF_TWO(1, n_seq * min_freq); // TODO fastq-qual weight
Expand All @@ -878,11 +879,11 @@ int abpoa_multip_read_clu(abpoa_graph_t *abg, int src_id, int sink_id, int n_seq
}
// find min set of het nodes
int *het_poss = (int*)_err_calloc(msa_l, sizeof(int));
int n_het_pos = abpoa_set_het_row_column_ids_weight(abg, read_ids, het_poss, rc_weight, msa_l, n_seq, m, min_w, read_ids_n);
int n_het_pos = abpoa_set_het_row_column_ids_weight(abg, read_ids, het_poss, rc_weight, msa_l, n_seq, m, min_w, read_ids_n, verbose);

if (n_het_pos < 1) n_clu = 1;
// collect at most _max_n_cons_ haplotypes and corresponding read ids
else n_clu = abpoa_collect_clu_hap_read_ids(het_poss, n_het_pos, read_ids, read_ids_n, n_seq, m, min_w, max_n_cons, clu_read_ids, &m_clu);
else n_clu = abpoa_collect_clu_hap_read_ids(het_poss, n_het_pos, read_ids, read_ids_n, n_seq, m, min_w, max_n_cons, clu_read_ids, &m_clu, verbose);

for (i = 0; i < msa_l; ++i) {
for (j = 0; j < m; ++j) free(read_ids[i][j]);
Expand All @@ -906,7 +907,7 @@ void abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt) {
int n_clu, m_clu, n_seq = ab->abs->n_seq; uint64_t **clu_read_ids;
int read_ids_n = (n_seq-1)/64+1;

if (abpt->max_n_cons > 1) n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu);
if (abpt->max_n_cons > 1) n_clu = abpoa_multip_read_clu(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID, n_seq, abpt->m, abpt->max_n_cons, abpt->min_freq, &clu_read_ids, &m_clu, abpt->verbose);
else n_clu = 1;

abpoa_cons_t *abc = ab->abc;
Expand Down
60 changes: 29 additions & 31 deletions src/abpoa_seed.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ void mm_aa_sketch(void *km, const uint8_t *str, int len, int w, int k, uint32_t
int abpoa_build_guide_tree(abpoa_para_t *abpt, int n_seq, ab_u128_v *mm, int *tree_id_map) {
if (mm->n == 0) return 0;

if (abpt->verbose > 0) fprintf(stderr, "[%s] Building progressive guide tree ... ", __func__);
if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "[%s] Building progressive guide tree ... ", __func__);
size_t i, _i, j; int rid1, rid2; // mm_hit_n: mimizer hits between each two sequences
// 0: 0
// 1: 0 1
Expand Down Expand Up @@ -319,7 +319,7 @@ int abpoa_build_guide_tree(abpoa_para_t *abpt, int n_seq, ab_u128_v *mm, int *tr
}

free(mm_hit_n); free(jac_sim);
if (abpt->verbose > 0) fprintf(stderr, "done!\n");
if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "done!\n");
return 0;
}

Expand Down Expand Up @@ -382,7 +382,9 @@ int get_local_chain_score(int j_end_tpos, int j_end_qpos, int i_end_anchor_i, ab
// local chains:
// x: strand | end_tpos | end_qpos
// y: end_anchor_i | start_anchor_i
int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_local_chains, ab_u64_v *anchors, int *score, int *pre_id, ab_u64_v *par_anchors, int min_w, int tlen, int qlen) {
int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_local_chains,
ab_u64_v *anchors, int *score, int *pre_id, ab_u64_v *par_anchors,
int min_w, int tlen, int qlen, int verbose) {
int i, j, st, score1, global_max_score=INT32_MIN, global_max_i=-1;
int *chain_score = (int*)kmalloc(km, n_local_chains * 4), *pre_chain_id = (int*)kmalloc(km, n_local_chains * 4);
size_t _n = par_anchors->n;
Expand Down Expand Up @@ -452,13 +454,13 @@ int abpoa_dp_chaining_of_local_chains(void *km, ab_u128_t *local_chains, int n_l
par_anchors->a[par_anchors->n-i-1] = tmp;
}

#ifdef __DEBUG__
for (i = _n; i < par_anchors->n; ++i) {
uint64_t ia = par_anchors->a[i];
// strand, rpos, qpos
fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
for (i = _n; i < par_anchors->n; ++i) {
uint64_t ia = par_anchors->a[i];
// strand, rpos, qpos
fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
}
}
#endif
kfree(km, chain_score), kfree(km, pre_chain_id);
return 0;
}
Expand All @@ -482,7 +484,7 @@ static int get_chain_score(int max_bw, int *score, int i_qpos, int i_tpos, int j
// Dynamic Programming-based Chaining for global alignment mode
// anchors:
// strand<<63 | tpos<<32 | qpos
int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_para_t *abpt, int tlen, int qlen) {
int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_para_t *abpt, int tlen, int qlen, int verbose) {
int i, j, st, n_a = anchors->n;
int *score = (int32_t*)kmalloc(km, n_a * 4), *pre_id = (int32_t*)kmalloc(km, n_a * 4), *end_pos = (int32_t*)kmalloc(km, n_a * 4);
memset(end_pos, 0, n_a * 4);
Expand Down Expand Up @@ -516,9 +518,7 @@ int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_

if (pre_id[j] >= 0) end_pos[pre_id[j]] = i;
}
#ifdef __DEBUG__
fprintf(stderr, "%d pre_id: %d, score: %d, tpos: %d, qpos: %d\n", i, max_j, max_score, i_tpos, i_qpos);
#endif
if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) fprintf(stderr, "%d pre_id: %d, score: %d, tpos: %d, qpos: %d\n", i, max_j, max_score, i_tpos, i_qpos);
score[i] = max_score, pre_id[i] = max_j;
}

Expand Down Expand Up @@ -570,7 +570,8 @@ int abpoa_dp_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, abpoa_
}

radix_sort_ab_128x(local_chains+tot_chain_i+1, local_chains + n_local_chains);
abpoa_dp_chaining_of_local_chains(km, local_chains+tot_chain_i+1, n_local_chains-1-tot_chain_i, anchors, score, pre_id, par_anchors, min_w, tlen, qlen);
abpoa_dp_chaining_of_local_chains(km, local_chains+tot_chain_i+1, n_local_chains-1-tot_chain_i,
anchors, score, pre_id, par_anchors, min_w, tlen, qlen, abpt->verbose);

kfree(km, score); kfree(km, pre_id); kfree(km, end_pos); kfree(km, local_chains);
return 0;
Expand Down Expand Up @@ -630,7 +631,7 @@ int LIS(void *km, int tot_n, uint64_t *rank, int n) {
// output:
// anchor list size: n
// list of anchors: anchors
int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w) {
int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w, int verbose) {
size_t i, j, n_a = anchors->n, n_for, n_rev;
uint64_t *for_rank = (uint64_t*)kmalloc(km, sizeof(uint64_t) * n_a);
uint64_t *rev_rank = (uint64_t*)kmalloc(km, sizeof(uint64_t) * n_a);
Expand Down Expand Up @@ -663,10 +664,8 @@ int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w)
n = n_rev; rank = rev_rank; kfree(km, for_rank);
}
// filter anchors
int last_tpos = -1, last_qpos = -1, cur_tpos, cur_qpos;
#ifdef __DEBUG__
size_t _n = par_anchors->n;
#endif
int last_tpos = -1, last_qpos = -1, cur_tpos, cur_qpos; size_t _n = 0;
if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) _n = par_anchors->n;
for (i = 0; i < n; ++i) {
j = (int)rank[i]-1;
cur_tpos = (anchors->a[j] >> 32) & 0x7fffffff;
Expand All @@ -677,26 +676,27 @@ int LIS_chaining(void *km, ab_u64_v *anchors, ab_u64_v *par_anchors, int min_w)
kv_push(uint64_t, 0, *par_anchors, anchors->a[j]); // store LIS anchors into par_anchors
last_tpos = cur_tpos; last_qpos = cur_qpos;
}
#ifdef __DEBUG__
for (i = _n; i < par_anchors->n; ++i) {
uint64_t ia = par_anchors->a[i];
// strand, rpos, qpos
fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));

if (verbose >= ABPOA_LONG_DEBUG_VERBOSE) {
for (i = _n; i < par_anchors->n; ++i) {
uint64_t ia = par_anchors->a[i];
// strand, rpos, qpos
fprintf(stderr, "%c\t%ld\t%d\n", "+-"[ia >> 63], (ia>>32) & 0x7fffffff, ((uint32_t)ia));
}
}
#endif
return 0;
}

int abpoa_collect_mm(void *km, uint8_t **seqs, int *seq_lens, int n_seq, abpoa_para_t *abpt, ab_u128_v *mm, int *mm_c) {
if (abpt->verbose > 0) fprintf(stderr, "[%s] Collecting minimizers ... ", __func__);
if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "[%s] Collecting minimizers ... ", __func__);
int i;
mm_c[0] = 0;
for (i = 0; i < n_seq; ++i) { // collect minimizers
if (abpt->m > 5) mm_aa_sketch(km, seqs[i], seq_lens[i], abpt->w, abpt->k, i, 0, mm);
else mm_sketch(km, seqs[i], seq_lens[i], abpt->w, abpt->k, i, 0, abpt->amb_strand, mm);
mm_c[i+1] = mm->n;
}
if (abpt->verbose > 0) fprintf(stderr, "done!\n");
if (abpt->verbose >= ABPOA_INFO_VERBOSE) fprintf(stderr, "done!\n");
return mm->n;
}

Expand Down Expand Up @@ -731,11 +731,9 @@ int abpoa_build_guide_tree_partition(uint8_t **seqs, int *seq_lens, int n_seq, a
// collect minimizer hit anchors between t and q
collect_anchors1(km, &anchors, mm1, mm_c, tid, qid, seq_lens[qid], abpt->k);
// filtering and only keep LIS anchors
#ifdef __DEBUG__
fprintf(stderr, "%d vs %d (tot_n: %ld)\n", tid, qid, anchors.n);
#endif
if (abpt->verbose >= ABPOA_LONG_DEBUG_VERBOSE) fprintf(stderr, "%d vs %d (tot_n: %ld)\n", tid, qid, anchors.n);
// alignment mode: different chaining result for global/local/extend
abpoa_dp_chaining(km, &anchors, par_anchors, abpt, seq_lens[tid], seq_lens[qid]);
abpoa_dp_chaining(km, &anchors, par_anchors, abpt, seq_lens[tid], seq_lens[qid], abpt->verbose);
par_c[i] = par_anchors->n;
kfree(km, anchors.a);
}
Expand Down
Loading

0 comments on commit e234dce

Please sign in to comment.