diff --git a/README.md b/README.md index 2d2c6a5..f29460e 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,17 @@ [![Build Status](https://img.shields.io/travis/yangao07/abPOA/master.svg?label=Master)](https://travis-ci.org/yangao07/abPOA) [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/master/LICENSE) +## Updates (v1.0.4) + +- Added read ID as head in MSA output: `-A` +- Added GFA output: `-r3`/`-r4` +- Added ambiguous strand mode: `-s` ## Getting started Download the [latest release](https://github.com/yangao07/abPOA/releases): ``` -wget https://github.com/yangao07/abPOA/releases/download/v1.0.3/abPOA-v1.0.3.tar.gz -tar -zxvf abPOA-v1.0.3.tar.gz && cd abPOA-v1.0.3 +wget https://github.com/yangao07/abPOA/releases/download/v1.0.4/abPOA-v1.0.4.tar.gz +tar -zxvf abPOA-v1.0.4.tar.gz && cd abPOA-v1.0.4 ``` Make from source and run with test data: ``` @@ -75,9 +80,9 @@ You can also build abPOA from source files. Make sure you have gcc (>=6.4.0) and zlib installed before compiling. It is recommended to download the [latest release](https://github.com/yangao07/abPOA/releases). ``` -wget https://github.com/yangao07/abPOA/releases/download/v1.0.3/abPOA-v1.0.3.tar.gz -tar -zxvf abPOA-v1.0.3.tar.gz -cd abPOA-v1.0.3; make +wget https://github.com/yangao07/abPOA/releases/download/v1.0.4/abPOA-v1.0.4.tar.gz +tar -zxvf abPOA-v1.0.4.tar.gz +cd abPOA-v1.0.4; make ``` Or, you can use `git clone` command to download the source code. This gives you the latest version of abPOA, which might be still under development. @@ -89,8 +94,8 @@ cd abPOA; make ### Pre-built binary executable file for Linux/Unix If you meet any compiling issue, please try the pre-built binary file: ``` -wget https://github.com/yangao07/abPOA/releases/download/v1.0.3/abPOA-v1.0.3_x64-linux.tar.gz -tar -zxvf abPOA-v1.0.3_x64-linux.tar.gz +wget https://github.com/yangao07/abPOA/releases/download/v1.0.4/abPOA-v1.0.4_x64-linux.tar.gz +tar -zxvf abPOA-v1.0.4_x64-linux.tar.gz ``` ## General usage diff --git a/python/cabpoa.pxd b/python/cabpoa.pxd index b52b615..57c4811 100644 --- a/python/cabpoa.pxd +++ b/python/cabpoa.pxd @@ -43,6 +43,8 @@ cdef extern from "abpoa.h": uint64_t *graph_cigar int node_s, node_e, query_s, query_e # for local and extension mode int n_aln_bases, n_matched_bases + uint32_t best_score + uint8_t is_rc ctypedef struct abpoa_para_t: @@ -56,13 +58,16 @@ cdef extern from "abpoa.h": int simd_flag # available SIMD instruction # alignment mode uint8_t ret_cigar, rev_cigar, out_msa, out_msa_header, out_cons, out_gfa, is_diploid, use_read_ids # mode: 0: global, 1: local, 2: extend + uint8_t amb_strand char *out_pog int align_mode, gap_mode, cons_agrm double min_freq # for diploid data + char LogTable65536[65536] + char bit_table16[65536] ctypedef struct abpoa_node_t: - int node_id, index, rank + int node_id int in_edge_n, in_edge_m int *in_id int out_edge_n, out_edge_m @@ -73,7 +78,6 @@ cdef extern from "abpoa.h": int read_ids_n # for diploid int aligned_node_n, aligned_node_m int *aligned_node_id # mismatch; aligned node will have same rank - int heaviest_weight, heaviest_out_id # for consensus uint8_t base # 0~m ctypedef struct abpoa_graph_t: @@ -81,11 +85,12 @@ cdef extern from "abpoa.h": int node_n, node_m, index_rank_m int *index_to_node_id int *node_id_to_index - int *node_id_to_min_rank - int *node_id_to_max_rank + int *node_id_to_max_pos_left + int *node_id_to_max_pos_right int *node_id_to_max_remain int *node_id_to_msa_rank uint8_t is_topological_sorted, is_called_cons, is_set_msa_rank + double cal_R_time ctypedef struct abpoa_simd_matrix_t: pass @@ -105,7 +110,7 @@ cdef extern from "abpoa.h": void abpoa_free(abpoa_t *ab, abpoa_para_t *abpt) # do msa for a set of input sequences - int abpoa_msa(abpoa_t *ab, abpoa_para_t *abpt, int n_seqs, char **seq_names, int *seq_lens, uint8_t **seqs, FILE *out_fp, uint8_t ***cons_seq, int **cons_l, int *cons_n, uint8_t ***msa_seq, int *msa_l) + int abpoa_msa(abpoa_t *ab, abpoa_para_t *abpt, int n_seqs, char **seq_names, int *seq_lens, uint8_t **seqs, FILE *out_fp, uint8_t ***cons_seq, uint8_t ***cons_cov, int **cons_l, int *cons_n, uint8_t ***msa_seq, int *msa_l) # clean alignment graph void abpoa_reset_graph(abpoa_t *ab, abpoa_para_t *abpt, int qlen) @@ -115,8 +120,8 @@ cdef extern from "abpoa.h": # add an alignment to a graph int abpoa_add_graph_node(abpoa_graph_t *abg, uint8_t base) - void abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_edge, uint8_t add_read_id, int read_id, int read_ids_n) - int abpoa_add_graph_alignment(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *query, int qlen, int n_cigar, uint64_t *abpoa_cigar, int read_id, int tot_read_n) + void abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_edge, int w, uint8_t add_read_id, int read_id, int read_ids_n) + int abpoa_add_graph_alignment(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *query, int qlen, abpoa_res_t res, int read_id, int tot_read_n) void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt) # generate consensus sequence from graph @@ -129,10 +134,10 @@ cdef extern from "abpoa.h": # Note: cons_seq and cons_l need to be freed by user. int abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt, int seq_n, FILE *out_fp, uint8_t ***cons_seq, int ***cons_cov, int **cons_l, int *cons_n) # generate column multiple sequence alignment from graph - void abpoa_generate_rc_msa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, int seq_n, FILE *out_fp, uint8_t ***msa_seq, int *msa_l) + void abpoa_generate_rc_msa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, uint8_t *is_rc, int seq_n, FILE *out_fp, uint8_t ***msa_seq, int *msa_l) # generate full graph in GFA format - void abpoa_generate_gfa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, int seq_n, FILE *out_fp) + void abpoa_generate_gfa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, uint8_t *is_rc, int seq_n, FILE *out_fp) # generate DOT graph plot int abpoa_dump_pog(abpoa_t *ab, abpoa_para_t *abpt) diff --git a/python/example.py b/python/example.py index 8f32791..c09bde3 100644 --- a/python/example.py +++ b/python/example.py @@ -35,7 +35,7 @@ #@parameters of msa #seqs: multiple sequences out_cons=True # generate consensus sequence, set as False to disable -out_msa=True # generate row-column multiple sequence alignment, set as False to disable +out_msa=True # generate row-column multiple sequence alignment, set as False to disable out_pog="example.png" # generate plot of alignment graph, set None to disable # multiple sequence alignment for 'seqs' diff --git a/python/pyabpoa.pyx b/python/pyabpoa.pyx index e085296..43050c3 100644 --- a/python/pyabpoa.pyx +++ b/python/pyabpoa.pyx @@ -143,9 +143,10 @@ cdef class msa_aligner: for i in range(seq_l): bseq[i] = self.seq_nt4_dict[seq[i]] res.n_cigar = 0 + res.is_rc = 0 abpoa_align_sequence_to_graph(self.ab, &self.abpt, bseq, seq_l, &res) - abpoa_add_graph_alignment(self.ab, &self.abpt, bseq, seq_l, res.n_cigar, res.graph_cigar, read_i, seq_n) + abpoa_add_graph_alignment(self.ab, &self.abpt, bseq, seq_l, res, read_i, seq_n) free(bseq) if res.n_cigar: free(res.graph_cigar) @@ -164,7 +165,7 @@ cdef class msa_aligner: free(cons_seq) free(cons_len) if self.abpt.out_msa: - abpoa_generate_rc_msa(self.ab, &self.abpt, NULL, seq_n, NULL, &msa_seq, &msa_l) + abpoa_generate_rc_msa(self.ab, &self.abpt, NULL, NULL, seq_n, NULL, &msa_seq, &msa_l) for i in range(seq_n): msa_seq1 = '' for c in msa_seq[i][:msa_l]: diff --git a/src/abpoa_graph.c b/src/abpoa_graph.c index c465f40..c8a890e 100644 --- a/src/abpoa_graph.c +++ b/src/abpoa_graph.c @@ -452,7 +452,7 @@ int abpoa_store_consensus(abpoa_graph_t *abg, int src_id, int sink_id, uint8_t * (*cons_seq)[0] = (uint8_t*)_err_malloc(sizeof(uint8_t) * abg->node_n); int id = abg->node[src_id].max_out_id, i = 0; while (id != sink_id) { - (*cons_seq)[0][i++] = "ACGTN"[abg->node[id].base]; + (*cons_seq)[0][i++] = abg->node[id].base; id = abg->node[id].max_out_id; } (*cons_l)[0] = i;