Skip to content

Commit

Permalink
use R instead of R_max/min; fix a bug when sequence length diff too much
Browse files Browse the repository at this point in the history
  • Loading branch information
Yan Gao committed May 2, 2020
1 parent 4272e8e commit d72b804
Show file tree
Hide file tree
Showing 14 changed files with 86 additions and 76 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,6 @@ python/src/*
# eval file
evaluation/msa_abPOA
evaluation/msa_spoa
evaluation/racon_abPOA
evaluation/racon_spoa

6 changes: 3 additions & 3 deletions evaluation/msa_abPOA.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* To compile:
* gcc -O3 msa_abPOA.c -I ./include -L ./lib -labpoa -lz -o msa_abPOA
gcc -O3 msa_abPOA.c -I ../include -L ../lib -labpoa -lz -o msa_abPOA
*/
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -125,7 +125,7 @@ int main (int argc, char * const argv[]) {
struct timeval start_time, end_time;
double runtime = 0;

int i;
int i,j = 0;
abpoa_t *ab = abpoa_init();
while(1) {
int seq_i = 0;
Expand Down Expand Up @@ -156,7 +156,7 @@ int main (int argc, char * const argv[]) {
if(feof(fp_seq)) break;
}
abpoa_free(ab, abpt);
fprintf(stderr, "%.2f\t", runtime);
fprintf(stderr, "%.2f ", runtime);

free(seq_lens); free(bseqs);
abpoa_free_para(abpt); fclose(fp_seq);
Expand Down
2 changes: 1 addition & 1 deletion evaluation/msa_spoa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ int main(int argc, char** argv) {
runtime += (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_usec - start_time.tv_usec) * 1e-6;

}
fprintf(stderr, "%.2f\t", runtime);
fprintf(stderr, "%.2f ", runtime);

fp_seq.close();
return 0;
Expand Down
4 changes: 2 additions & 2 deletions evaluation/racon_abPOA.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* To compile:
* gcc -O3 racon_abPOA.c -I ../include -L ../lib -labpoa -lz -o racon_abPOA
gcc -O3 racon_abPOA.c -I ../include -L ../lib -labpoa -lz -o racon_abPOA
*/
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -166,7 +166,7 @@ int main (int argc, char * const argv[]) {
}
}
abpoa_free(ab, abpt);
fprintf(stderr, "%.2f\t", runtime);
fprintf(stderr, "%.2f ", runtime);

free(seq_lens); free(bseqs);
abpoa_free_para(abpt); fclose(fp_seq);
Expand Down
6 changes: 1 addition & 5 deletions evaluation/racon_spoa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,8 @@ int main(int argc, char** argv) {
runtime += (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_usec - start_time.tv_usec) * 1e-6;
seq_i = 1;
}




}
fprintf(stderr, "%.2f\t", runtime);
fprintf(stderr, "%.2f ", runtime);

fp_seq.close();
return 0;
Expand Down
2 changes: 1 addition & 1 deletion include/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ typedef struct {
typedef struct {
abpoa_node_t *node; int node_n, node_m, index_rank_m;
int *index_to_node_id;
int *node_id_to_index, *node_id_to_max_pos_left, *node_id_to_max_pos_right, *node_id_to_min_remain, *node_id_to_max_remain, *node_id_to_msa_rank;
int *node_id_to_index, *node_id_to_max_pos_left, *node_id_to_max_pos_right, *node_id_to_max_remain, *node_id_to_msa_rank;
int cons_l, cons_m; uint8_t *cons_seq;
uint8_t is_topological_sorted:1, is_called_cons:1, is_set_msa_rank:1;
} abpoa_graph_t;
Expand Down
1 change: 0 additions & 1 deletion python/cabpoa.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ cdef extern from "abpoa.h":
int *node_id_to_index
int *node_id_to_min_rank
int *node_id_to_max_rank
int *node_id_to_min_remain
int *node_id_to_max_remain
int *node_id_to_msa_rank
int cons_l, cons_m
Expand Down
2 changes: 2 additions & 0 deletions python/pyabpoa.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ cdef class msa_aligner:
self.nt4_seq_dict[1] = 'C'
self.nt4_seq_dict[2] = 'G'
self.nt4_seq_dict[3] = 'T'
self.nt4_seq_dict[4] = 'N'

def __dealloc__(self):
free(self.abpt.mat)
Expand Down Expand Up @@ -137,6 +138,7 @@ cdef class msa_aligner:

abpoa_reset_graph(self.ab, &self.abpt, len(seqs[0]))
for read_i, seq in enumerate(seqs):
print(read_i)
seq_l = len(seq)
bseq = <uint8_t*>malloc(seq_l * cython.sizeof(uint8_t))
for i in range(seq_l):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
description = "pyabpoa: SIMD-based partial order alignment using adaptive band",
long_description = long_description,
long_description_content_type="text/markdown",
version = "1.0.0a1",
version = "1.0.1",
url = "https://github.com/yangao07/abPOA",
author = "Yan Gao",
author_email = "[email protected]",
Expand Down
2 changes: 1 addition & 1 deletion src/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ typedef struct {
typedef struct {
abpoa_node_t *node; int node_n, node_m, index_rank_m;
int *index_to_node_id;
int *node_id_to_index, *node_id_to_max_pos_left, *node_id_to_max_pos_right, *node_id_to_min_remain, *node_id_to_max_remain, *node_id_to_msa_rank;
int *node_id_to_index, *node_id_to_max_pos_left, *node_id_to_max_pos_right, *node_id_to_max_remain, *node_id_to_msa_rank;
int cons_l, cons_m; uint8_t *cons_seq;
uint8_t is_topological_sorted:1, is_called_cons:1, is_set_msa_rank:1;
} abpoa_graph_t;
Expand Down
6 changes: 3 additions & 3 deletions src/abpoa_align.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
#define DIPLOID_MIN_FREQ 0.3

// start and end of each band:
// range: (min_of_two(max_left, qlen-max_remain), max_of_two(max_right, qlen-min_remain))
// range: (min_of_two(max_left, qlen-remain), max_of_two(max_right, qlen-remain))
// with extra band width: (range_min-w, range_max+w)
#define GET_AD_DP_BEGIN(graph, w, i, qlen) MAX_OF_TWO(0, MIN_OF_TWO(abpoa_graph_node_id_to_max_pos_left(graph, i), qlen - abpoa_graph_node_id_to_max_remain(graph, i))-w)
#define GET_AD_DP_END(graph, w, i, qlen) MIN_OF_TWO(qlen, MAX_OF_TWO(abpoa_graph_node_id_to_max_pos_right(graph, i), qlen - abpoa_graph_node_id_to_min_remain(graph, i))+w)
#define GET_AD_DP_BEGIN(graph, w, i, qlen) MAX_OF_TWO(0, MIN_OF_TWO(abpoa_graph_node_id_to_max_pos_left(graph, i), qlen - abpoa_graph_node_id_to_max_remain(graph, i))-w)
#define GET_AD_DP_END(graph, w, i, qlen) MIN_OF_TWO(qlen, MAX_OF_TWO(abpoa_graph_node_id_to_max_pos_right(graph, i), qlen - abpoa_graph_node_id_to_max_remain(graph, i))+w)

#ifdef __cplusplus
extern "C" {
Expand Down
34 changes: 19 additions & 15 deletions src/abpoa_graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ abpoa_graph_t *abpoa_init_graph(void) {
graph->cons_l = graph->cons_m = 0; graph->cons_seq = NULL;
graph->is_topological_sorted = graph->is_called_cons = 0;
graph->node_id_to_index = NULL; graph->index_to_node_id = NULL; graph->node_id_to_msa_rank = NULL;
graph->node_id_to_max_pos_left = NULL; graph->node_id_to_max_pos_right = NULL; graph->node_id_to_min_remain = NULL; graph->node_id_to_max_remain = NULL;
graph->node_id_to_max_pos_left = NULL; graph->node_id_to_max_pos_right = NULL; graph->node_id_to_max_remain = NULL;
return graph;
}

Expand All @@ -126,7 +126,6 @@ void abpoa_free_graph(abpoa_graph_t *graph, abpoa_para_t *abpt) {

if (graph->node_id_to_max_pos_left) free(graph->node_id_to_max_pos_left);
if (graph->node_id_to_max_pos_right) free(graph->node_id_to_max_pos_right);
if (graph->node_id_to_min_remain) free(graph->node_id_to_min_remain);
if (graph->node_id_to_max_remain) free(graph->node_id_to_max_remain);
}
free(graph);
Expand Down Expand Up @@ -192,39 +191,48 @@ next_out_node:;
}

void abpoa_BFS_set_node_remain(abpoa_graph_t *graph, int src_id, int sink_id) {
int *id, cur_id, i, in_id;
int *id, cur_id, i, out_id, in_id;

int *out_degree = (int*)_err_malloc(graph->node_n * sizeof(int));
for (i = 0; i < graph->node_n; ++i) {
out_degree[i] = graph->node[i].out_edge_n;
graph->node_id_to_max_remain[i] = 0;
graph->node_id_to_min_remain[i] = graph->node_n;
}

kdq_int_t *q = kdq_init_int();

// Breadth-First-Search
kdq_push_int(q, sink_id); // node[q.id].in_degree equals 0
graph->node_id_to_max_remain[sink_id] = graph->node_id_to_min_remain[sink_id] = -1; // XXX not 0
graph->node_id_to_max_remain[sink_id] = -1; // XXX not 0
while ((id = kdq_shift_int(q)) != 0) {
cur_id = *id;

// all out_id of cur_id have beed visited
// max weight out_id
if (cur_id != sink_id) {
int max_w=-1, max_id=-1;
for (i = 0; i < graph->node[cur_id].out_edge_n; ++i) {
out_id = graph->node[cur_id].out_id[i];
if (graph->node[cur_id].out_weight[i] > max_w) {
max_w = graph->node[cur_id].out_weight[i];
max_id = out_id;
}
}
graph->node_id_to_max_remain[cur_id] = graph->node_id_to_max_remain[max_id] + 1;
// fprintf(stderr, "%d -> %d\n", graph->node_id_to_index[cur_id], graph->node_id_to_max_remain[cur_id]);
}
if (cur_id == src_id) {
kdq_destroy_int(q); free(out_degree);
return;
}
for (i = 0; i < graph->node[cur_id].in_edge_n; ++i) {
in_id = graph->node[cur_id].in_id[i];
int max_remain = graph->node_id_to_max_remain[cur_id] + 1, min_remain = graph->node_id_to_min_remain[cur_id] + 1;

if (max_remain > graph->node_id_to_max_remain[in_id]) graph->node_id_to_max_remain[in_id] = max_remain;
if (min_remain < graph->node_id_to_min_remain[in_id]) graph->node_id_to_min_remain[in_id] = min_remain;

if (--out_degree[in_id] == 0) kdq_push_int(q, in_id);
}
}
err_fatal_simple("Failed to set node remain.");
}

// 1. index_to_node_id
// 2. node_id_to_index
// 3. node_id_to_rank
Expand All @@ -245,10 +253,8 @@ void abpoa_topological_sort(abpoa_t *ab, abpoa_para_t *abpt) {
if (abpt->wb >= 0) {
graph->node_id_to_max_pos_left = (int*)_err_realloc(graph->node_id_to_max_pos_left, graph->index_rank_m * sizeof(int));
graph->node_id_to_max_pos_right = (int*)_err_realloc(graph->node_id_to_max_pos_right, graph->index_rank_m * sizeof(int));
graph->node_id_to_min_remain = (int*)_err_realloc(graph->node_id_to_min_remain, graph->index_rank_m * sizeof(int));
graph->node_id_to_max_remain = (int*)_err_realloc(graph->node_id_to_max_remain, graph->index_rank_m * sizeof(int));
} else if (abpt->zdrop > 0) {
graph->node_id_to_min_remain = (int*)_err_realloc(graph->node_id_to_min_remain, graph->index_rank_m * sizeof(int));
graph->node_id_to_max_remain = (int*)_err_realloc(graph->node_id_to_max_remain, graph->index_rank_m * sizeof(int));
}
}
Expand Down Expand Up @@ -997,7 +1003,7 @@ int abpoa_add_graph_alignment(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *seq, int

// reset allocated memery everytime init the graph
// * node
// * index_to_node_id/node_id_to_index/node_id_to_max/min_remain, max_pos_left/right
// * index_to_node_id/node_id_to_index/node_id_to_max_remain, max_pos_left/right
void abpoa_reset_graph(abpoa_t *ab, abpoa_para_t *abpt, int qlen) {
int i, k, node_m;
ab->abg->cons_l = 0; ab->abg->is_topological_sorted = ab->abg->is_called_cons = 0;
Expand All @@ -1022,10 +1028,8 @@ void abpoa_reset_graph(abpoa_t *ab, abpoa_para_t *abpt, int qlen) {
if (abpt->wb >= 0) {
ab->abg->node_id_to_max_pos_left = (int*)_err_realloc(ab->abg->node_id_to_max_pos_left, node_m * sizeof(int));
ab->abg->node_id_to_max_pos_right = (int*)_err_realloc(ab->abg->node_id_to_max_pos_right, node_m * sizeof(int));
ab->abg->node_id_to_min_remain = (int*)_err_realloc(ab->abg->node_id_to_min_remain, node_m * sizeof(int));
ab->abg->node_id_to_max_remain = (int*)_err_realloc(ab->abg->node_id_to_max_remain, node_m * sizeof(int));
} else if (abpt->zdrop > 0) {
ab->abg->node_id_to_min_remain = (int*)_err_realloc(ab->abg->node_id_to_min_remain, node_m * sizeof(int));
ab->abg->node_id_to_max_remain = (int*)_err_realloc(ab->abg->node_id_to_max_remain, node_m * sizeof(int));
}
}
Expand Down
5 changes: 0 additions & 5 deletions src/abpoa_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ static inline int abpoa_graph_node_id_to_max_remain(abpoa_graph_t *graph, int no
return graph->node_id_to_max_remain[node_id];
}

static inline int abpoa_graph_node_id_to_min_remain(abpoa_graph_t *graph, int node_id) {
if (node_id < 0 || node_id >= graph->node_n) err_fatal(__func__, "Wrong node id: %d\n", node_id);
return graph->node_id_to_min_remain[node_id];
}

static inline int abpoa_graph_index_to_node_id(abpoa_graph_t *graph, int index_i) {
if (index_i < 0 || index_i >= graph->node_n) err_fatal(__func__, "Wrong index: %d\n", index_i);
return graph->index_to_node_id[index_i];
Expand Down
Loading

0 comments on commit d72b804

Please sign in to comment.