support multiple consensus sequences

yangao07 · Mar 15, 2022 · bfe4ac0 · bfe4ac0
1 parent fb0a6a1
commit bfe4ac0
Show file tree

Hide file tree

Showing 22 changed files with 1,474 additions and 1,055 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,6 +19,7 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
 add_library(abpoa
     src/abpoa_align.c
     src/abpoa_graph.c
+    src/abpoa_output.c
     src/abpoa_plot.c
     src/abpoa_seed.c
     src/abpoa_seq.c

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,6 +1,7 @@
 include src/abpoa_align.h
 include src/abpoa_graph.h
 include src/abpoa.h
+include src/abpoa_output.h
 include src/abpoa_seed.h
 include src/abpoa_seq.h
 include src/kalloc.h

diff --git a/Makefile b/Makefile
@@ -45,9 +45,9 @@ LIB_DIR = $(OUT_PRE_DIR)/lib
 INC_DIR = ./include
 SRC_DIR = ./src
 
-SOURCE = $(SRC_DIR)/abpoa_align.c $(SRC_DIR)/abpoa.c $(SRC_DIR)/abpoa_graph.c $(SRC_DIR)/abpoa_plot.c $(SRC_DIR)/abpoa_seed.c $(SRC_DIR)/abpoa_seq.c $(SRC_DIR)/kalloc.c $(SRC_DIR)/kstring.c  $(SRC_DIR)/simd_abpoa_align.c $(SRC_DIR)/simd_check.c $(SRC_DIR)/utils.c
-HEADER = $(SRC_DIR)/abpoa_align.h $(SRC_DIR)/abpoa_graph.h $(SRC_DIR)/abpoa.h $(SRC_DIR)/abpoa_seed.h $(SRC_DIR)/abpoa_seq.h $(SRC_DIR)/kalloc.h $(SRC_DIR)/kdq.h $(SRC_DIR)/khash.h $(SRC_DIR)/kseq.h $(SRC_DIR)/ksort.h $(SRC_DIR)/kstring.h $(SRC_DIR)/kvec.h $(SRC_DIR)/simd_instruction.h $(SRC_DIR)/simd_abpoa_align.h $(SRC_DIR)/utils.h
-OBJS   = $(SRC_DIR)/abpoa_align.o $(SRC_DIR)/abpoa_graph.o $(SRC_DIR)/abpoa_plot.o $(SRC_DIR)/abpoa_seed.o $(SRC_DIR)/abpoa_seq.o $(SRC_DIR)/kalloc.o $(SRC_DIR)/kstring.o $(SRC_DIR)/simd_abpoa_align.o $(SRC_DIR)/simd_check.o $(SRC_DIR)/utils.o
+SOURCE = $(SRC_DIR)/abpoa_align.c $(SRC_DIR)/abpoa.c $(SRC_DIR)/abpoa_graph.c $(SRC_DIR)/abpoa_plot.c $(SRC_DIR)/abpoa_seed.c $(SRC_DIR)/abpoa_seq.c $(SRC_DIR)/abpoa_output.c $(SRC_DIR)/kalloc.c $(SRC_DIR)/kstring.c  $(SRC_DIR)/simd_abpoa_align.c $(SRC_DIR)/simd_check.c $(SRC_DIR)/utils.c
+HEADER = $(SRC_DIR)/abpoa_align.h $(SRC_DIR)/abpoa_graph.h $(SRC_DIR)/abpoa.h $(INC_DIR)/abpoa.h $(SRC_DIR)/abpoa_seed.h $(SRC_DIR)/abpoa_seq.h $(SRC_DIR)/abpoa_output.h $(SRC_DIR)/kalloc.h $(SRC_DIR)/kdq.h $(SRC_DIR)/khash.h $(SRC_DIR)/kseq.h $(SRC_DIR)/ksort.h $(SRC_DIR)/kstring.h $(SRC_DIR)/kvec.h $(SRC_DIR)/simd_instruction.h $(INC_DIR)/simd_instruction.h $(SRC_DIR)/simd_abpoa_align.h $(SRC_DIR)/utils.h
+OBJS   = $(SRC_DIR)/abpoa_align.o $(SRC_DIR)/abpoa_graph.o $(SRC_DIR)/abpoa_plot.o $(SRC_DIR)/abpoa_seed.o $(SRC_DIR)/abpoa_seq.o $(SRC_DIR)/abpoa_output.o $(SRC_DIR)/kalloc.o $(SRC_DIR)/kstring.o $(SRC_DIR)/simd_abpoa_align.o $(SRC_DIR)/simd_check.o $(SRC_DIR)/utils.o
 
 # SIMD label
 SIMD_CHECK_D = -D __CHECK_SIMD_MAIN__

diff --git a/README.md b/README.md
@@ -8,20 +8,16 @@
 [![Build Status](https://img.shields.io/travis/yangao07/abPOA/master.svg?label=Master)](https://travis-ci.org/yangao07/abPOA)
 [![License](https://img.shields.io/badge/License-MIT-black.svg)](https://github.com/yangao07/abPOA/blob/master/LICENSE)
 <!-- [![PyPI](https://img.shields.io/pypi/v/pyabpoa.svg?style=flat)](https://pypi.python.org/pypi/pyabpoa) -->
-## Updates (v1.3.0)
+## Updates (v1.4.0)
 
-- Use SIMDE to handle SIMD instructions
-- Set seeding as disabled by default, -S/--seeding to enable seeding, remove -N/--no-seeding
-- Set min. size of POA windows as 500 in seeding mode
-- Add -c/--amio-acid to align amino acid input sequences
-- Add `is_aa` parameter in pyabpoa to align amino acid input sequences
-- Fix CMakeLists.txt
+- Allow to generate multiple consensus sequences from graph alignment (-d)
+- Output sequence name by default in MSA format, remove -A/--msa-header
 
 ## Getting started
 Download the [latest release](https://github.com/yangao07/abPOA/releases):
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.3.0/abPOA-v1.3.0.tar.gz
-tar -zxvf abPOA-v1.3.0.tar.gz && cd abPOA-v1.3.0
+wget https://github.com/yangao07/abPOA/releases/download/v1.4.0/abPOA-v1.4.0.tar.gz
+tar -zxvf abPOA-v1.4.0.tar.gz && cd abPOA-v1.4.0
 ```
 Make from source and run with test data:
 ```
@@ -40,18 +36,23 @@ abpoa ./test_data/seq.fa > cons.fa
   - [Building abPOA from source files](#build)
   - [Pre-built binary executable file for Linux/Unix](#binary)
 - [General usage](#usage)
-  - [To generate consensus sequence](#gen_cons)
+  - [To generate one consensus sequence](#gen_1cons)
+  - [To generate multiple consensus sequences](#gen_mcons)
   - [To generate row-column multiple sequence alignment](#gen_msa)
   - [To generate graph information in GFA format](#gen_gfa)
   - [To align sequence to an existing graph in GFA/MSA format](#aln_to_gfa)
   - [To generate a plot of the alignment graph](#gen_plot)
-- [Commands and options](#cmd)
 - [Input](#input)
 - [Output](#output)
   - [Consensus sequence](#cons)
   - [Row-column multiple sequence alignment](#msa)
   - [Full graph information](#gfa)
   - [Plot of alignment graph](#plot)
+- [Algorithm description](#description)
+  - [Adaptive banding](#banding)
+  - [Minimizer-based seeding and partition](#seeding)
+  - [Minimizer-based progressive tree](#tree)
+  - [Multiple conensus sequences](#mcons)
 - [For development](#dev)
 - [Evaluation datasets](#eval)
 - [Contact](#contact)
@@ -84,9 +85,9 @@ You can also build abPOA from source files.
 Make sure you have gcc (>=6.4.0) and zlib installed before compiling.
 It is recommended to download the [latest release](https://github.com/yangao07/abPOA/releases).
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.3.0/abPOA-v1.3.0.tar.gz
-tar -zxvf abPOA-v1.3.0.tar.gz
-cd abPOA-v1.3.0; make
+wget https://github.com/yangao07/abPOA/releases/download/v1.4.0/abPOA-v1.4.0.tar.gz
+tar -zxvf abPOA-v1.4.0.tar.gz
+cd abPOA-v1.4.0; make
 ```
 Or, you can use `git clone` command to download the source code.
 This gives you the latest version of abPOA, which might be still under development.
@@ -98,17 +99,23 @@ cd abPOA; make
 ### <a name="binary"></a>Pre-built binary executable file for Linux/Unix 
 If you meet any compiling issue, please try the pre-built binary file:
 ```
-wget https://github.com/yangao07/abPOA/releases/download/v1.3.0/abPOA-v1.3.0_x64-linux.tar.gz
-tar -zxvf abPOA-v1.3.0_x64-linux.tar.gz
+wget https://github.com/yangao07/abPOA/releases/download/v1.4.0/abPOA-v1.4.0_x64-linux.tar.gz
+tar -zxvf abPOA-v1.4.0_x64-linux.tar.gz
 ```
 
 ## <a name="usage"></a>General usage
-### <a name="gen_cons"></a>To generate consensus sequence
+### <a name="gen_1cons"></a>To generate consensus sequence
 
 ```
 abpoa seq.fa > cons.fa
 ```
 
+### <a name="gen_mcons"></a>To generate multiple consensus sequences
+
+```
+abpoa heter.fa -d2 > 2cons.fa
+```
+
 ### <a name="gen_msa"></a>To generate row-column multiple sequence alignment in PIR format
 
 ```
@@ -128,14 +135,14 @@ abpoa seq.fa -r4 > out.gfa
 ### <a name="aln_to_gfa"></a>To align sequence to an existing graph in GFA/MSA format
 ```
 abpoa -i in.gfa seq.fa -r3 > out.gfa
-abpoa -i in.msa seq.fa -Ar1 > out.msa
+abpoa -i in.msa seq.fa -r1 > out.msa
 ```
 For GFA input file, `S` and `P` lines are required and are used to reconstruct the alignment graph.
 For MSA input file, which is generally a FASTA format file, `-` in the sequence indicates the alignment gap.
 If you want to use abPOA to generate a MSA output file and then perform the incremental graph alignment, please do not forget `-A` to include the FASTA header of each sequence:
 
 ```
-abpoa seq1.fa -Ar1 > seq1.msa
+abpoa seq1.fa -r1 > seq1.msa
 abpoa -i seq1.msa seq2.fa > cons.fa
 ```
 
@@ -146,61 +153,6 @@ abpoa seq.fa -g poa.png > cons.fa
 ```
 See [Plot of alignment graph](#plot) for more details about the plot file.
 
-## <a name="cmd"></a>Commands and options
-```
-Usage: abpoa [options] <in.fa/fq> > cons.fa/msa.out/abpoa.gfa
-
-Options:
-  Alignment:
-    -m --aln-mode INT       alignment mode [0]
-                              0: global, 1: local, 2: extension
-    -M --match    INT       match score [2]
-    -X --mismatch INT       mismatch penalty [4]
-    -t --matrix   FILE      scoring matrix file, '-M' and '-X' are not used when '-t' is used [Null]
-                            e.g., 'HOXD70.mtx, BLOSUM62.mtx'
-    -O --gap-open INT(,INT) gap opening penalty (O1,O2) [4,24]
-    -E --gap-ext  INT(,INT) gap extension penalty (E1,E2) [2,1]
-                            abPOA provides three gap penalty modes, cost of a g-long gap:
-                            - convex (default): min{O1+g*E1, O2+g*E2}
-                            - affine (set O2 as 0): O1+g*E1
-                            - linear (set O1 as 0): g*E1
-    -s --amb-strand         ambiguous strand mode [False]
-                            for each input sequence, try the reverse complement if the current
-                            alignment score is too low, and pick the strand with a higher score
-  Adaptive banded DP:
-    -b --extra-b  INT       first adaptive banding parameter [10]
-                            set b as < 0 to disable adaptive banded DP
-    -f --extra-f  FLOAT     second adaptive banding parameter [0.01]
-                            the number of extra bases added on both sites of the band is
-                            b+f*L, where L is the length of the aligned sequence
-  Minimizer-based seeding and partition (only effective in global alignment mode):
-    -S --seeding            enable seeding [False]
-    -k --k-mer       INT    minimizer k-mer size [19]
-    -w --window      INT    minimizer window size [10]
-    -n --min-poa-win INT    min. size of window to perform POA [500]
-    -p --progressive        build guide tree and perform progressive partial order alignment [False]
-  Input/Output:
-    -c --amino-acid         input sequences are amino acid (default is nucleotide) [False]
-    -l --in-list            input file is a list of sequence file names [False]
-                            each line is one sequence file containing a set of sequences
-                            which will be aligned by abPOA to generate a consensus sequence
-    -i --incrmnt  FILE      incrementally align sequences to an existing graph/MSA [Null]
-                            graph could be in GFA or MSA format generated by abPOA
-    -o --output   FILE      ouput to FILE [stdout]
-    -r --result   INT       output result mode [0]
-                            - 0: consensus in FASTA format
-                            - 1: MSA in PIR format
-                            - 2: both 0 & 1
-                            - 3: graph in GFA format
-                            - 4: graph with consensus path in GFA format
-                            - 5: consensus in FASTQ format
-    -A --msa-header         add read ID as header of each sequence in MSA output [False]
-    -g --out-pog  FILE      dump final alignment graph to FILE (.pdf/.png) [Null]
-
-    -h --help               print this help usage information
-    -v --version            show version number
-```
-
 ## <a name="input"></a>Input
 abPOA works with FASTA, FASTQ, gzip'd FASTA(.fa.gz) and gzip'd FASTQ(.fq.gz) formats. The input file is 
 expected to contains multiple sequences which will be processed sequentially to perform the iterative 
@@ -254,6 +206,42 @@ The numbers inside the nodes are the node IDs. The numbers on the edges are the
 Make sure you have `dot` installed beforing using abPOA to generate the plot.
 For Linux/Unix systems: `sudo apt-get install graphviz`.
 
+## <a name="description"></a>Algorithm description
+### <a name="banding"></a>Adaptive banding
+To understand how the adaptive banding working, please refer to our [Bioinformatics paper](https://dx.doi.org/10.1093/bioinformatics/btaa963).
+
+### <a name="seeding"></a>Minimizer-based seeding mode
+As abPOA always allocates quadratic size of memory, for very long input sequences (>10 kb), memory usage will be a challenge.
+
+To solve this issue, we develop a minimizer-based seeding and partition method to split the sequence and graph with a small window.
+The full POA DP matrix can be split into several smaller ones and adaptive banded POA can be performed within each small window separately.
+
+In more detail, abPOA extracts all the minimizers from all the input sequences, then all the minimizer hits between each pair of two sequences can be found.
+For each pair of seqences, the minimizer hits are first chained together using relatively stringent criteria to make sure that no big gap exists in the chain.
+This usually leads to several separated local chains of minimizer hits.
+A second round of chaining is then performed on all the local minimizer chains to generate a global chain going through the entire sequence.
+With this global chain, abPOA selects a series of minimizer hits as partition anchors which has at least a distance of 500 bp (by default, -n/--min-poa-win).
+Within each partitioned window, abPOA performs banded partial order alignment separately and combines all the alignment results at the end.
+
+### <a name="tree"></a>Minimizer-based progressive tree
+Instead of aligning all the sequences in the original order, abPOA can alternatively build a progressive tree to guide the alignment order.
+The generation of the progressive tree is also based on minimizers.
+For each pair of sequences, abPOA calculates their similarity score which is the Jaccard similarity of the minimizers, i.e. the number of minimizer hits divided by the total number of all minimizers from the two sequences.
+With all the similarity scores (minimizer-based Jaccard similarity), abPOA builds the progressive tree in the following way:
+
+1. Pick the first two sequences that have the highest scores. The progressive tree set is initialized as these first two sequences.
+2. For each remaining sequence, sum the scores between the remaining sequence and all the sequences from the current progressive tree set. Pick the one with the highest sum score, and push it to the progressive tree set.
+3. Repeat step 2, until no sequence remains.
+
+Then, abPOA performs partial order alignment following the order of sequences in this progressive tree set.
+
+### <a name="mcons"></a>Multiple consensus sequences
+Since v1.4.0, abPOA supports generation of multiple consensus sequences (set -d/--max-num-cons as >1).
+
+The general underlying idea is to group input sequences into multiple clusters based on the heterozygous bases in the graph,
+Then, one consensus sequence is separately generated for each cluster of input sequences.
+The minimum allele frequency for each heterozygous base is 0.25 (by default, -q/--min-freq). 
+
 ## <a name="dev"></a>For development
 abPOA is not only a stand-alone tool for MSA and consensus calling, it can also work as a programming library. [example.c](example.c) shows how to use the C APIs of abPOA to take a set of sequences as input and perform MSA and consensus calling. Basically, the library file `libabpoa.a` and two header files [abpoa.h](include/abpoa.h) and [simd_instruction.h](include/simd_instruction.h) are needed to make the abPOA library work in your program.
 

diff --git a/example.c b/example.c
@@ -1,7 +1,7 @@
 /* example.c libabpoa usage example
    To compile: 
 gcc -g example.c -I ./include -L ./lib -labpoa -lz -lm -o example
-   or:
+or:
 gcc -g example.c -I ./include ./lib/libabpoa.a -lz -lm -o example
 */
 #include <stdio.h>
@@ -53,17 +53,30 @@ const char nt256_table[256] = {
 
 int main(void) {
     int i, j, n_seqs = 10;
+    // char seqs[10][100] = {
+    //     "CGTCAATCTATCGAAGCATACGCGGGCAGAGCCGAAGACCTCGGCAATCCA",
+    //     "CCACGTCAATCTATCGAAGCATACGCGGCAGCCGAACTCGACCTCGGCAATCAC",
+    //     "CGTCAATCTATCGAAGCATACGCGGCAGAGCCCGGAAGACCTCGGCAATCAC",
+    //     "CGTCAATGCTAGTCGAAGCAGCTGCGGCAGAGCCGAAGACCTCGGCAATCAC",
+    //     "CGTCAATCTATCGAAGCATTCTACGCGGCAGAGCCGACCTCGGCAATCAC",
+    //     "CGTCAATCTAGAAGCATACGCGGCAAGAGCCGAAGACCTCGGCCAATCAC",
+    //     "CGTCAATCTATCGGTAAAGCATACGCTCTGTAGCCGAAGACCTCGGCAATCAC",
+    //     "CGTCAATCTATCTTCAAGCATACGCGGCAGAGCCGAAGACCTCGGCAATC",
+    //     "CGTCAATGGATCGAGTACGCGGCAGAGCCGAAGACCTCGGCAATCAC",
+    //     "CGTCAATCTAATCGAAGCATACGCGGCAGAGCCGTCTACCTCGGCAATCACGT"
+    //     };
+
     char seqs[10][100] = {
-        "CGTCAATCTATCGAAGCATACGCGGGCAGAGCCGAAGACCTCGGCAATCCA",
-        "CCACGTCAATCTATCGAAGCATACGCGGCAGCCGAACTCGACCTCGGCAATCAC",
-        "CGTCAATCTATCGAAGCATACGCGGCAGAGCCCGGAAGACCTCGGCAATCAC",
-        "CGTCAATGCTAGTCGAAGCAGCTGCGGCAGAGCCGAAGACCTCGGCAATCAC",
-        "CGTCAATCTATCGAAGCATTCTACGCGGCAGAGCCGACCTCGGCAATCAC",
-        "CGTCAATCTAGAAGCATACGCGGCAAGAGCCGAAGACCTCGGCCAATCAC",
-        "CGTCAATCTATCGGTAAAGCATACGCTCTGTAGCCGAAGACCTCGGCAATCAC",
-        "CGTCAATCTATCTTCAAGCATACGCGGCAGAGCCGAAGACCTCGGCAATC",
-        "CGTCAATGGATCGAGTACGCGGCAGAGCCGAAGACCTCGGCAATCAC",
-        "CGTCAATCTAATCGAAGCATACGCGGCAGAGCCGTCTACCTCGGCAATCACGT"
+        "CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT",
+        "CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT"
         };
 
     // initialize variables
@@ -89,6 +102,7 @@ int main(void) {
     abpt->out_cons = 1; // generate consensus sequence, set 0 to disable
     abpt->w = 6, abpt->k = 9; abpt->min_w = 10; // minimizer-based seeding and partition
     abpt->progressive_poa = 1;
+    abpt->max_n_cons = 2; // to generate 2 consensus sequences
 
     abpoa_post_set_para(abpt);
 
@@ -102,49 +116,45 @@ int main(void) {
             bseqs[i][j] = nt4_table[(int)seqs[i][j]];
     }
 
-    // 1. output to stdout
+    // 1. directly output to stdout
     fprintf(stdout, "=== output to stdout ===\n");
-
     // perform abpoa-msa
-    abpoa_msa(ab, abpt, n_seqs, NULL, seq_lens, bseqs, stdout, NULL, NULL, NULL, NULL, NULL, NULL);
+    abpoa_msa(ab, abpt, n_seqs, NULL, seq_lens, bseqs, stdout);
 
-    // 2. variables to store result
-    uint8_t **cons_seq; int **cons_cov, *cons_l, cons_n=0;
-    uint8_t **msa_seq; int msa_l=0;
-
-    // perform abpoa-msa
-    ab->abs->n_seq = 0; // To re-use ab, n_seq needs to be set as 0
-    abpoa_msa(ab, abpt, n_seqs, NULL, seq_lens, bseqs, NULL, &cons_seq, &cons_cov, &cons_l, &cons_n, &msa_seq, &msa_l);
-
-    fprintf(stdout, "=== output to variables ===\n");
-    for (i = 0; i < cons_n; ++i) {
-        fprintf(stdout, ">Consensus_sequence\n");
-        for (j = 0; j < cons_l[i]; ++j)
-            fprintf(stdout, "%c", nt256_table[cons_seq[i][j]]);
-        fprintf(stdout, "\n");
-    }
+    // 2. output MSA alignment and  consensus sequence stored in (abpoa_cons_t *)
+    abpoa_cons_t *abc = ab->abc;
+    fprintf(stdout, "=== stored in variables ===\n");
     fprintf(stdout, ">Multiple_sequence_alignment\n");
-    for (i = 0; i < n_seqs; ++i) {
-        for (j = 0; j < msa_l; ++j) {
-            fprintf(stdout, "%c", nt256_table[msa_seq[i][j]]);
+    for (i = 0; i < abc->n_seq; ++i) {
+        for (j = 0; j < abc->msa_len; ++j) {
+            fprintf(stdout, "%c", nt256_table[abc->msa_base[i][j]]);
         }
         fprintf(stdout, "\n");
     }
 
-    if (cons_n) {
-        for (i = 0; i < cons_n; ++i) {
-            free(cons_seq[i]); free(cons_cov[i]);
-        } free(cons_seq); free(cons_cov); free(cons_l);
-    }
-    if (msa_l) {
-        for (i = 0; i < n_seqs; ++i) free(msa_seq[i]); free(msa_seq);
+    for (i = 0; i < abc->n_cons; ++i) {
+        fprintf(stdout, ">Consensus_sequence");
+        if (abc->n_cons > 1) {
+            fprintf(stdout, "_%d ", i+1);
+            for (j = 0; j < abc->clu_n_seq[i]; ++j) { // output read ids for each cluster/group
+                fprintf(stdout, "%d", abc->clu_read_ids[i][j]);
+                if (j != abc->clu_n_seq[i]-1) fprintf(stdout, ",");
+            }
+        }
+        fprintf(stdout, "\n");
+        for (j = 0; j < abc->cons_len[i]; ++j)
+            fprintf(stdout, "%c", nt256_table[abc->cons_base[i][j]]);
+        fprintf(stdout, "\n");
     }
 
     /* generate DOT partial order graph plot */
     abpt->out_pog = strdup("example.png"); // dump parital order graph to file
     if (abpt->out_pog != NULL) abpoa_dump_pog(ab, abpt);
 
+    // free seq-related variables
     for (i = 0; i < n_seqs; ++i) free(bseqs[i]); free(bseqs); free(seq_lens);
+
+    // free abpoa-related variables
     abpoa_free(ab); abpoa_free_para(abpt); 
     return 0;
 }