Skip to content

Commit

Permalink
v1.0.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Yan Gao committed Oct 6, 2020
1 parent eb8a601 commit 681cee4
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 20 deletions.
19 changes: 12 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,17 @@
[![Build Status](https://img.shields.io/travis/yangao07/abPOA/master.svg?label=Master)](https://travis-ci.org/yangao07/abPOA)
[![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/master/LICENSE)
<!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
## Updates (v1.0.4)

- Added read ID as head in MSA output: `-A`
- Added GFA output: `-r3`/`-r4`
- Added ambiguous strand mode: `-s`

## Getting started
Download the [latest release](https://github.com/yangao07/abPOA/releases):
```
wget https://github.com/yangao07/abPOA/releases/download/v1.0.3/abPOA-v1.0.3.tar.gz
tar -zxvf abPOA-v1.0.3.tar.gz && cd abPOA-v1.0.3
wget https://github.com/yangao07/abPOA/releases/download/v1.0.4/abPOA-v1.0.4.tar.gz
tar -zxvf abPOA-v1.0.4.tar.gz && cd abPOA-v1.0.4
```
Make from source and run with test data:
```
Expand Down Expand Up @@ -75,9 +80,9 @@ You can also build abPOA from source files.
Make sure you have gcc (>=6.4.0) and zlib installed before compiling.
It is recommended to download the [latest release](https://github.com/yangao07/abPOA/releases).
```
wget https://github.com/yangao07/abPOA/releases/download/v1.0.3/abPOA-v1.0.3.tar.gz
tar -zxvf abPOA-v1.0.3.tar.gz
cd abPOA-v1.0.3; make
wget https://github.com/yangao07/abPOA/releases/download/v1.0.4/abPOA-v1.0.4.tar.gz
tar -zxvf abPOA-v1.0.4.tar.gz
cd abPOA-v1.0.4; make
```
Or, you can use `git clone` command to download the source code.
This gives you the latest version of abPOA, which might be still under development.
Expand All @@ -89,8 +94,8 @@ cd abPOA; make
### <a name="binary"></a>Pre-built binary executable file for Linux/Unix
If you meet any compiling issue, please try the pre-built binary file:
```
wget https://github.com/yangao07/abPOA/releases/download/v1.0.3/abPOA-v1.0.3_x64-linux.tar.gz
tar -zxvf abPOA-v1.0.3_x64-linux.tar.gz
wget https://github.com/yangao07/abPOA/releases/download/v1.0.4/abPOA-v1.0.4_x64-linux.tar.gz
tar -zxvf abPOA-v1.0.4_x64-linux.tar.gz
```

## <a name="usage"></a>General usage
Expand Down
23 changes: 14 additions & 9 deletions python/cabpoa.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ cdef extern from "abpoa.h":
uint64_t *graph_cigar
int node_s, node_e, query_s, query_e # for local and extension mode
int n_aln_bases, n_matched_bases
uint32_t best_score
uint8_t is_rc


ctypedef struct abpoa_para_t:
Expand All @@ -56,13 +58,16 @@ cdef extern from "abpoa.h":
int simd_flag # available SIMD instruction
# alignment mode
uint8_t ret_cigar, rev_cigar, out_msa, out_msa_header, out_cons, out_gfa, is_diploid, use_read_ids # mode: 0: global, 1: local, 2: extend
uint8_t amb_strand
char *out_pog
int align_mode, gap_mode, cons_agrm
double min_freq # for diploid data
char LogTable65536[65536]
char bit_table16[65536]


ctypedef struct abpoa_node_t:
int node_id, index, rank
int node_id
int in_edge_n, in_edge_m
int *in_id
int out_edge_n, out_edge_m
Expand All @@ -73,19 +78,19 @@ cdef extern from "abpoa.h":
int read_ids_n # for diploid
int aligned_node_n, aligned_node_m
int *aligned_node_id # mismatch; aligned node will have same rank
int heaviest_weight, heaviest_out_id # for consensus
uint8_t base # 0~m

ctypedef struct abpoa_graph_t:
abpoa_node_t *node
int node_n, node_m, index_rank_m
int *index_to_node_id
int *node_id_to_index
int *node_id_to_min_rank
int *node_id_to_max_rank
int *node_id_to_max_pos_left
int *node_id_to_max_pos_right
int *node_id_to_max_remain
int *node_id_to_msa_rank
uint8_t is_topological_sorted, is_called_cons, is_set_msa_rank
double cal_R_time

ctypedef struct abpoa_simd_matrix_t:
pass
Expand All @@ -105,7 +110,7 @@ cdef extern from "abpoa.h":
void abpoa_free(abpoa_t *ab, abpoa_para_t *abpt)

# do msa for a set of input sequences
int abpoa_msa(abpoa_t *ab, abpoa_para_t *abpt, int n_seqs, char **seq_names, int *seq_lens, uint8_t **seqs, FILE *out_fp, uint8_t ***cons_seq, int **cons_l, int *cons_n, uint8_t ***msa_seq, int *msa_l)
int abpoa_msa(abpoa_t *ab, abpoa_para_t *abpt, int n_seqs, char **seq_names, int *seq_lens, uint8_t **seqs, FILE *out_fp, uint8_t ***cons_seq, uint8_t ***cons_cov, int **cons_l, int *cons_n, uint8_t ***msa_seq, int *msa_l)

# clean alignment graph
void abpoa_reset_graph(abpoa_t *ab, abpoa_para_t *abpt, int qlen)
Expand All @@ -115,8 +120,8 @@ cdef extern from "abpoa.h":

# add an alignment to a graph
int abpoa_add_graph_node(abpoa_graph_t *abg, uint8_t base)
void abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_edge, uint8_t add_read_id, int read_id, int read_ids_n)
int abpoa_add_graph_alignment(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *query, int qlen, int n_cigar, uint64_t *abpoa_cigar, int read_id, int tot_read_n)
void abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_edge, int w, uint8_t add_read_id, int read_id, int read_ids_n)
int abpoa_add_graph_alignment(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *query, int qlen, abpoa_res_t res, int read_id, int tot_read_n)
void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt)

# generate consensus sequence from graph
Expand All @@ -129,10 +134,10 @@ cdef extern from "abpoa.h":
# Note: cons_seq and cons_l need to be freed by user.
int abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt, int seq_n, FILE *out_fp, uint8_t ***cons_seq, int ***cons_cov, int **cons_l, int *cons_n)
# generate column multiple sequence alignment from graph
void abpoa_generate_rc_msa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, int seq_n, FILE *out_fp, uint8_t ***msa_seq, int *msa_l)
void abpoa_generate_rc_msa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, uint8_t *is_rc, int seq_n, FILE *out_fp, uint8_t ***msa_seq, int *msa_l)

# generate full graph in GFA format
void abpoa_generate_gfa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, int seq_n, FILE *out_fp)
void abpoa_generate_gfa(abpoa_t *ab, abpoa_para_t *abpt, char **seq_names, uint8_t *is_rc, int seq_n, FILE *out_fp)

# generate DOT graph plot
int abpoa_dump_pog(abpoa_t *ab, abpoa_para_t *abpt)
2 changes: 1 addition & 1 deletion python/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#@parameters of msa
#seqs: multiple sequences
out_cons=True # generate consensus sequence, set as False to disable
out_msa=True # generate row-column multiple sequence alignment, set as False to disable
out_msa=True # generate row-column multiple sequence alignment, set as False to disable
out_pog="example.png" # generate plot of alignment graph, set None to disable

# multiple sequence alignment for 'seqs'
Expand Down
5 changes: 3 additions & 2 deletions python/pyabpoa.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,10 @@ cdef class msa_aligner:
for i in range(seq_l):
bseq[i] = self.seq_nt4_dict[seq[i]]
res.n_cigar = 0
res.is_rc = 0
abpoa_align_sequence_to_graph(self.ab, &self.abpt, bseq, seq_l, &res)

abpoa_add_graph_alignment(self.ab, &self.abpt, bseq, seq_l, res.n_cigar, res.graph_cigar, read_i, seq_n)
abpoa_add_graph_alignment(self.ab, &self.abpt, bseq, seq_l, res, read_i, seq_n)
free(bseq)
if res.n_cigar: free(res.graph_cigar)

Expand All @@ -164,7 +165,7 @@ cdef class msa_aligner:
free(cons_seq)
free(cons_len)
if self.abpt.out_msa:
abpoa_generate_rc_msa(self.ab, &self.abpt, NULL, seq_n, NULL, &msa_seq, &msa_l)
abpoa_generate_rc_msa(self.ab, &self.abpt, NULL, NULL, seq_n, NULL, &msa_seq, &msa_l)
for i in range(seq_n):
msa_seq1 = ''
for c in msa_seq[i][:msa_l]:
Expand Down
2 changes: 1 addition & 1 deletion src/abpoa_graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ int abpoa_store_consensus(abpoa_graph_t *abg, int src_id, int sink_id, uint8_t *
(*cons_seq)[0] = (uint8_t*)_err_malloc(sizeof(uint8_t) * abg->node_n);
int id = abg->node[src_id].max_out_id, i = 0;
while (id != sink_id) {
(*cons_seq)[0][i++] = "ACGTN"[abg->node[id].base];
(*cons_seq)[0][i++] = abg->node[id].base;
id = abg->node[id].max_out_id;
}
(*cons_l)[0] = i;
Expand Down

0 comments on commit 681cee4

Please sign in to comment.