Skip to content

Commit

Permalink
sort in/out ids
Browse files Browse the repository at this point in the history
  • Loading branch information
yangao07 committed Nov 26, 2024
1 parent b0276ea commit 7206d5b
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/abpoa.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ int main(int argc, char **argv) {
case 'E': abpt->gap_ext1 = strtol(optarg, &s, 10); if (*s == ',') abpt->gap_ext2 = strtol(s+1, &s, 10); break;

case 'G': abpt->inc_path_score = 1; break;
case 'L': abpt->sort_input_seq_by_len = 1; break;
case 'L': abpt->sort_input_seq = 1; break;
case 'R': abpt->put_gap_on_right = 1; break;
case 'b': abpt->wb = atoi(optarg); break;
case 'f': abpt->wf = atof(optarg); break;
Expand Down
2 changes: 1 addition & 1 deletion src/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ typedef struct {
int m; int *mat; char *mat_fn; // score matrix
int use_score_matrix; // set _mat_ based on score matrix file, then _match_/_mismatch_ is not used.
int match, max_mat, mismatch, min_mis, gap_open1, gap_open2, gap_ext1, gap_ext2; int inf_min;
int sort_input_seq_by_len; // sort input sequences by length, in descending order
int sort_input_seq; // sort input sequences by length, in descending order
int put_gap_on_right; // put indel on the left-most side of the alignment: minimap2-like, or right-most: wfa2-like
int inc_path_score; // set mismatch/match score based on node weight, i.e., # covered reads * match/mismatch
// minimizer seeding parameter
Expand Down
5 changes: 3 additions & 2 deletions src/abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ abpoa_para_t *abpoa_init_para(void) {
abpt->end_bonus = -1; // disable end bonus

abpt->inc_path_score = 0;
abpt->sort_input_seq_by_len = 0;
abpt->sort_input_seq = 0;
abpt->put_gap_on_right = 0;

abpt->wb = ABPOA_EXTRA_B; // extra bandwidth
Expand Down Expand Up @@ -205,6 +205,7 @@ int abpoa_anchor_poa(abpoa_t *ab, abpoa_para_t *abpt, uint8_t **seqs, int **weig
// uint8_t *seq1;
for (_i = 0; _i < n_seq; ++_i) {
i = read_id_map[_i]; read_id = exist_n_seq + i; qlen = seq_lens[i]; whole_res.n_cigar = 0, whole_res.m_cigar = 0, whole_res.graph_cigar = 0;
fprintf(stderr, "seq: # %d\n", i);
if (abpt->verbose >= ABPOA_DEBUG_VERBOSE) fprintf(stderr, "seq: # %d\n", i);
// seq-to-graph alignment and add alignment within each split window
if (_i == 0) ai = 0; else ai = par_c[_i-1];
Expand Down Expand Up @@ -463,7 +464,7 @@ int abpoa_msa1(abpoa_t *ab, abpoa_para_t *abpt, char *read_fn, FILE *out_fp) {
gzFile readfp = xzopen(read_fn, "r"); kseq_t *ks = kseq_init(readfp);
int i, j, n_seq = abpoa_read_seq(abs, ks);

if (abpt->sort_input_seq_by_len) abpoa_sort_seq_by_length(abs, exist_n_seq, n_seq);
if (abpt->sort_input_seq) abpoa_sort_seq_by_length(abs, exist_n_seq, n_seq);

// always reset graph before perform POA
int max_len = 0;
Expand Down
28 changes: 28 additions & 0 deletions src/abpoa_graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,33 @@ void abpoa_free(abpoa_t *ab) {
free(ab);
}

// sort in_id/out_id by edge weight in descending order
void abpoa_sort_in_out_ids(abpoa_graph_t *abg) {
int i, j, k;
int tmp;
for (i = 0; i < abg->node_n; ++i) {
// in_id
for (j = 0; j < abg->node[i].in_edge_n-1; ++j) {
for (k = j+1; k < abg->node[i].in_edge_n; ++k) {
if (abg->node[i].in_edge_weight[j] < abg->node[i].in_edge_weight[k]) {
tmp = abg->node[i].in_id[j]; abg->node[i].in_id[j] = abg->node[i].in_id[k]; abg->node[i].in_id[k] = tmp;
tmp = abg->node[i].in_edge_weight[j]; abg->node[i].in_edge_weight[j] = abg->node[i].in_edge_weight[k]; abg->node[i].in_edge_weight[k] = tmp;
}
}
}
// out_id
for (j = 0; j < abg->node[i].out_edge_n-1; ++j) {
for (k = j+1; k < abg->node[i].out_edge_n; ++k) {
if (abg->node[i].out_edge_weight[j] < abg->node[i].out_edge_weight[k]) {
tmp = abg->node[i].out_id[j]; abg->node[i].out_id[j] = abg->node[i].out_id[k]; abg->node[i].out_id[k] = tmp;
tmp = abg->node[i].out_edge_weight[j]; abg->node[i].out_edge_weight[j] = abg->node[i].out_edge_weight[k]; abg->node[i].out_edge_weight[k] = tmp;
}
}
}
}

}

void abpoa_BFS_set_node_index(abpoa_graph_t *abg, int src_id, int sink_id) {
int *id, cur_id, out_id, aligned_id;
int index = 0, q_size, new_q_size;
Expand Down Expand Up @@ -311,6 +338,7 @@ void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt) {
}
// start from ABPOA_SRC_NODE_ID to ABPOA_SINK_NODE_ID
abpoa_BFS_set_node_index(abg, ABPOA_SRC_NODE_ID, ABPOA_SINK_NODE_ID);
abpoa_sort_in_out_ids(abg);
// init min/max rank
if (abpt->wb >= 0) {
int i;
Expand Down
28 changes: 14 additions & 14 deletions src/simd_abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
/* first pre_node */ \
pre_i = pre_index[dp_i][0]; \
if (abpt->inc_path_score) path_score = abpoa_get_incre_path_score(graph, node_id, 0); \
SIMDi dp_path_score = SIMDSetOne(path_score); \
SIMDi dp_path_score = SIMDSetOne(path_score); \
pre_dp_h = DP_H + pre_i * dp_sn; \
pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i]; \
/* set M from (pre_i, q_i-1), E from (pre_i, q_i) */ \
Expand Down Expand Up @@ -843,7 +843,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
/* first pre_node */ \
pre_i = pre_index[dp_i][0]; \
if (abpt->inc_path_score) path_score = abpoa_get_incre_path_score(graph, node_id, 0); \
SIMDi dp_path_score = SIMDSetOne(path_score); \
SIMDi dp_path_score = SIMDSetOne(path_score); \
pre_dp_h = DP_HEF + pre_i * 3 * dp_sn; pre_dp_e1 = pre_dp_h + dp_sn; \
pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i]; pre_end_sn = dp_end_sn[pre_i]; \
/* set M from (pre_i, q_i-1) */ \
Expand All @@ -858,7 +858,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
} \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */ \
remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN); \
dp_h[sn_i] = SIMDAdd(SIMDOri(first, remain), dp_path_score); \
dp_h[sn_i] = SIMDAdd(SIMDOri(first, remain), dp_path_score); \
first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN); \
} \
/* set E from (pre_i, q_i) */ \
Expand All @@ -868,13 +868,13 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
for (i = _end_sn+1; i <= end_sn; ++i) dp_e1[i] = SIMD_INF_MIN; \
} \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) /* SIMD parallelization */ \
dp_e1[sn_i] = SIMDAdd(pre_dp_e1[sn_i], dp_path_score); \
dp_e1[sn_i] = SIMDAdd(pre_dp_e1[sn_i], dp_path_score); \
/* get max m and e */ \
for (i = 1; i < pre_n[dp_i]; ++i) { \
pre_i = pre_index[dp_i][i]; \
if (abpt->inc_path_score) { \
path_score = abpoa_get_incre_path_score(graph, node_id, i); \
dp_path_score = SIMDSetOne(path_score); \
dp_path_score = SIMDSetOne(path_score); \
} \
pre_dp_h = DP_HEF + pre_i * 3 * dp_sn; pre_dp_e1 = pre_dp_h + dp_sn; \
pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i]; pre_end_sn = dp_end_sn[pre_i]; \
Expand All @@ -888,7 +888,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
} \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */ \
remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN); \
dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), dp_path_score), dp_h[sn_i]); \
dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), dp_path_score), dp_h[sn_i]); \
first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN); \
} \
/* set E from (pre_i, q_i) */ \
Expand Down Expand Up @@ -968,7 +968,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
/* first pre_node */ \
pre_i = pre_index[dp_i][0]; \
if (abpt->inc_path_score) path_score = abpoa_get_incre_path_score(graph, node_id, 0); \
SIMDi dp_path_score = SIMDSetOne(path_score); \
SIMDi dp_path_score = SIMDSetOne(path_score); \
pre_dp_h = DP_H2E2F + pre_i * 5 * dp_sn; pre_dp_e1 = pre_dp_h + dp_sn; pre_dp_e2 = pre_dp_e1 + dp_sn; \
pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i]; pre_end_sn = dp_end_sn[pre_i]; \
/* set M from (pre_i, q_i-1) */ \
Expand All @@ -984,7 +984,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
/* fprintf(stderr, "1 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, _beg_sn, _end_sn); */ \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */ \
remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN); \
dp_h[sn_i] = SIMDAdd(SIMDOri(first, remain), dp_path_score); \
dp_h[sn_i] = SIMDAdd(SIMDOri(first, remain), dp_path_score); \
first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN); \
} \
/* set E from (pre_i, q_i) */ \
Expand All @@ -995,15 +995,15 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
} \
/* fprintf(stderr, "2 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, _beg_sn, _end_sn); */ \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */ \
dp_e1[sn_i] = SIMDAdd(pre_dp_e1[sn_i], dp_path_score); \
dp_e2[sn_i] = SIMDAdd(pre_dp_e2[sn_i], dp_path_score); \
dp_e1[sn_i] = SIMDAdd(pre_dp_e1[sn_i], dp_path_score); \
dp_e2[sn_i] = SIMDAdd(pre_dp_e2[sn_i], dp_path_score); \
} \
/* get max m and e */ \
for (i = 1; i < pre_n[dp_i]; ++i) { \
pre_i = pre_index[dp_i][i]; \
if (abpt->inc_path_score) { \
path_score = abpoa_get_incre_path_score(graph, node_id, i); \
dp_path_score = SIMDSetOne(path_score); \
dp_path_score = SIMDSetOne(path_score); \
} \
pre_dp_h = DP_H2E2F + (pre_i * 5) * dp_sn; pre_dp_e1 = pre_dp_h + dp_sn; pre_dp_e2 = pre_dp_e1 + dp_sn; \
pre_end = dp_end[pre_i]; pre_beg_sn = dp_beg_sn[pre_i]; pre_end_sn = dp_end_sn[pre_i]; \
Expand All @@ -1018,15 +1018,15 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
/* fprintf(stderr, "3 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, _beg_sn, _end_sn); */ \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */ \
remain = SIMDShiftLeft(pre_dp_h[sn_i], SIMDShiftOneN); \
dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), dp_path_score), dp_h[sn_i]); \
dp_h[sn_i] = SIMDMax(SIMDAdd(SIMDOri(first, remain), dp_path_score), dp_h[sn_i]); \
first = SIMDShiftRight(pre_dp_h[sn_i], SIMDTotalBytes-SIMDShiftOneN); \
} \
/* set E from (pre_i, q_i) */ \
_end_sn = MIN_OF_TWO(pre_end_sn, end_sn); \
/* fprintf(stderr, "4 index_i: %d, beg_sn: %d, end_sn: %d\n", index_i, _beg_sn, _end_sn); */ \
for (sn_i = _beg_sn; sn_i <= _end_sn; ++sn_i) { /* SIMD parallelization */ \
dp_e1[sn_i] = SIMDMax(SIMDAdd(pre_dp_e1[sn_i], dp_path_score), dp_e1[sn_i]); \
dp_e2[sn_i] = SIMDMax(SIMDAdd(pre_dp_e2[sn_i], dp_path_score), dp_e2[sn_i]); \
dp_e1[sn_i] = SIMDMax(SIMDAdd(pre_dp_e1[sn_i], dp_path_score), dp_e1[sn_i]); \
dp_e2[sn_i] = SIMDMax(SIMDAdd(pre_dp_e2[sn_i], dp_path_score), dp_e2[sn_i]); \
} \
} \
/* compare M, E, and F */ \
Expand Down

0 comments on commit 7206d5b

Please sign in to comment.