Skip to content

Commit

Permalink
add cons_algrm in pyabpoa
Browse files Browse the repository at this point in the history
  • Loading branch information
yangao07 committed Aug 29, 2024
1 parent cd3f142 commit 8784b06
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 38 deletions.
55 changes: 28 additions & 27 deletions .github/workflows/wheels.yml → .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,35 @@ env:
CIBW_SKIP: pp*

jobs:
build_wheels:
name: Build wheels for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
runs-on: ${{ matrix.buildplat[0] }}
strategy:
# Ensure that a wheel builder finishes even if another fails
fail-fast: false
matrix:
buildplat:
- [ubuntu-latest, manylinux_x86_64, auto]
- [macos-latest, macosx_x86_64, x86_64]
# skip these for now, need more work
- [macos-latest, macosx_arm64, arm64]
# python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
python: ["cp312"]
# build_wheels:
# name: Build wheels for ${{ matrix.python }}-${{ matrix.buildplat[1] }}
# runs-on: ${{ matrix.buildplat[0] }}
# strategy:
# # Ensure that a wheel builder finishes even if another fails
# fail-fast: false
# matrix:
# buildplat:
# - [ubuntu-latest, manylinux_x86_64, auto]
# - [macos-latest, macosx_x86_64, x86_64]
# # skip these for now, need more work
# - [macos-latest, macosx_arm64, arm64]
# # python: ["cp38", "cp39", "cp310", "cp311", "cp312"]
# python: ["cp312"]

steps:
- uses: actions/checkout@v3
with:
submodules: 'true'
# steps:
# - uses: actions/checkout@v3
# with:
# submodules: 'true'

- name: Build wheels
uses: pypa/[email protected]
env:
CIBW_ARCHS: ${{ matrix.buildplat[2] }}
CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
# - name: Build wheels
# uses: pypa/[email protected]
# env:
# CIBW_ARCHS: ${{ matrix.buildplat[2] }}
# CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}

- uses: actions/upload-artifact@v3
with:
path: ./wheelhouse/*.whl
# - uses: actions/upload-artifact@v3
# with:
# path: ./wheelhouse/*.whl

build_sdist:
name: Build source distribution
Expand All @@ -62,7 +62,8 @@ jobs:
path: dist/*.tar.gz

upload_pypi:
needs: [build_wheels, build_sdist]
# needs: [build_wheels, build_sdist]
needs: [build_sdist]
runs-on: ubuntu-latest
# upload to PyPI on every tag starting with 'v'
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
Expand Down
2 changes: 1 addition & 1 deletion include/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#define ABPOA_OUT_CONS_FQ 5

#define ABPOA_HB 0
#define ABPOA_MC 1
#define ABPOA_MF 1

#define ABPOA_NONE_VERBOSE 0
#define ABPOA_INFO_VERBOSE 1
Expand Down
1 change: 1 addition & 0 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ This constructs a multiple sequence alignment handler of pyabpoa, it accepts the
* **gap_ext2**: second gap extension penalty; default: **1**
* **extra_b**: first adaptive banding paremeter; set as < 0 to disable adaptive banded DP; default: **10**
* **extra_f**: second adaptive banding paremete; the number of extra bases added on both sites of the band is *b+f\*L*, where *L* is the length of the aligned sequence; default : **0.01**
* **cons_algrm**: consensus calling algorithm. 'HB': heaviest bunlding, 'MF': most frequent bases; default: **'HB'**

The `msa_aligner` handler provides one method which performs multiple sequence alignment and takes four arguments:
```
Expand Down
3 changes: 1 addition & 2 deletions python/cabpoa.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ cdef extern from "abpoa.h":
cdef int ABPOA_OUT_CONS_GFA "ABPOA_OUT_CONS_GFA"

cdef int ABPOA_HB "ABPOA_HB"
cdef int ABPOA_HC "ABPOA_HC"
cdef int ABPOA_MF "ABPOA_MF"

ctypedef struct abpoa_res_t:
Expand Down Expand Up @@ -63,7 +62,7 @@ cdef extern from "abpoa.h":
uint8_t use_qv, disable_seeding, progressive_poa
char *incr_fn
char *out_pog
int align_mode, gap_mode, max_n_cons
int align_mode, gap_mode, max_n_cons, cons_algrm
double min_freq # for diploid data
int verbose

Expand Down
12 changes: 10 additions & 2 deletions python/pyabpoa.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ cdef class msa_aligner:
cdef abpoa_para_t abpt
cdef seq2int_dict, int2seq_dict

def __cinit__(self, aln_mode='g', is_aa=False, match=2, mismatch=4, score_matrix=b'', gap_open1=4, gap_open2=24, gap_ext1=2, gap_ext2=1,
extra_b=10, extra_f=0.01):
def __cinit__(self, aln_mode='g', is_aa=False,
match=2, mismatch=4, score_matrix=b'', gap_open1=4, gap_open2=24, gap_ext1=2, gap_ext2=1,
extra_b=10, extra_f=0.01,
cons_algrm='HB'):
self.ab = abpoa_init()

if aln_mode == 'g':
Expand Down Expand Up @@ -132,6 +134,12 @@ cdef class msa_aligner:
self.abpt.zdrop = -1
self.abpt.disable_seeding = 1
self.abpt.progressive_poa = 0
if cons_algrm.upper() == 'MF':
self.abpt.cons_algrm = ABPOA_MF
elif cons_algrm.upper() == 'HB':
self.abpt.cons_algrm = ABPOA_HB
else:
raise Exception('Unknown conseneus calling mode: {}'.format(cons_algrm))

self.seq2int_dict, self.int2seq_dict = set_seq_int_dict(self.abpt.m)

Expand Down
2 changes: 1 addition & 1 deletion src/abpoa.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ int abpoa_usage(void)
err_printf(" - %d: consensus in FASTQ format\n", ABPOA_OUT_CONS_FQ);
err_printf(" -a --cons-algrm INT consensus algorithm [%d]\n", ABPOA_HB);
err_printf(" - %d: heaviest bundling path in partial order graph\n", ABPOA_HB);
err_printf(" - %d: most frequent bases at each position\n", ABPOA_MC);
err_printf(" - %d: most frequent bases at each position\n", ABPOA_MF);
err_printf(" -d --maxnum-cons INT max. number of consensus sequence to generate [1]\n");
err_printf(" -q --min-freq FLOAT min. frequency of each consensus sequence (only effective when -d/--num-cons > 1) [%.2f]\n", MULTIP_MIN_FREQ);
err_printf(" -g --out-pog FILE dump final alignment graph to FILE (.pdf/.png) [Null]\n\n");
Expand Down
2 changes: 1 addition & 1 deletion src/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#define ABPOA_OUT_CONS_FQ 5

#define ABPOA_HB 0
#define ABPOA_MC 1
#define ABPOA_MF 1

#define ABPOA_NONE_VERBOSE 0
#define ABPOA_INFO_VERBOSE 1
Expand Down
4 changes: 2 additions & 2 deletions src/abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,10 @@ abpoa_para_t *abpoa_init_para(void) {

void abpoa_post_set_para(abpoa_para_t *abpt) {
abpoa_set_gap_mode(abpt);
if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) {
if (abpt->out_msa || abpt->out_gfa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF) {
abpt->use_read_ids = 1;
if (abpt->out_msa || abpt->out_gfa) set_65536_table();
if (abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC) set_bit_table16();
if (abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF) set_bit_table16();
}
if (abpt->align_mode == ABPOA_LOCAL_MODE) abpt->wb = -1;
int i;
Expand Down
4 changes: 2 additions & 2 deletions src/abpoa_graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt) {
// fprintf(stderr, "node_n: %d, index_rank_m: %d\n", node_n, abg->index_rank_m);
abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, abg->index_rank_m * sizeof(int));
abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, abg->index_rank_m * sizeof(int));
if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC)
if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF)
abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, abg->index_rank_m * sizeof(int));
if (abpt->wb >= 0) {
abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, abg->index_rank_m * sizeof(int));
Expand Down Expand Up @@ -699,7 +699,7 @@ void abpoa_reset(abpoa_t *ab, abpoa_para_t *abpt, int qlen) {
abg->node_m = abg->index_rank_m = node_m;
abg->index_to_node_id = (int*)_err_realloc(abg->index_to_node_id, node_m * sizeof(int));
abg->node_id_to_index = (int*)_err_realloc(abg->node_id_to_index, node_m * sizeof(int));
if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MC)
if (abpt->out_msa || abpt->max_n_cons > 1 || abpt->cons_algrm == ABPOA_MF)
abg->node_id_to_msa_rank = (int*)_err_realloc(abg->node_id_to_msa_rank, node_m * sizeof(int));
if (abpt->wb >= 0) {
abg->node_id_to_max_pos_left = (int*)_err_realloc(abg->node_id_to_max_pos_left, node_m * sizeof(int));
Expand Down

0 comments on commit 8784b06

Please sign in to comment.