diff --git a/.gitignore b/.gitignore index 9e165295f4..cf49089ee5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.fai !metagraph/tests/data/*.fa !metagraph/tests/data/*.fai +!metagraph/tests/data/taxonomic_data/*.fa metagraph/tests/data/*dump_test* projects/*/temp visualization/geolocation/data/* diff --git a/metagraph/integration_tests/test_taxonomy.py b/metagraph/integration_tests/test_taxonomy.py new file mode 100644 index 0000000000..474f6e09ef --- /dev/null +++ b/metagraph/integration_tests/test_taxonomy.py @@ -0,0 +1,195 @@ +import unittest +import subprocess +from subprocess import PIPE +from parameterized import parameterized + +from tempfile import TemporaryDirectory +import os + + +"""Test taxonomy classification framework""" + +METAGRAPH = './metagraph' +PROTEIN_MODE = os.readlink(METAGRAPH).endswith("_Protein") # TODO - decide if we need to consider this "_Protein" case +TAX_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + '/../tests/data/taxonomic_data' + +tax_tests = { + 'one_thread': { + 'threads': 1, + }, + 'nine_threads': { + 'threads': 9, + } +} + +test_params = [name for name, _ in tax_tests.items()] + +class TestTaxonomy(unittest.TestCase): + def setUp(self): + self.tempdir = TemporaryDirectory() + self.tax_parent = {} + self.tax_root = -1 + self.lca_coverage = 0.9 + self.k = 20 + tax_lines = open(TAX_DATA_DIR + '/dumb_nodes.dmp').readlines() + for line in tax_lines: + act_node = line.split('\t')[0].strip() + act_parent = line.split('\t')[2].strip() + self.tax_parent[act_node] = act_parent + if act_node == act_parent: + self.tax_root = act_node + + def is_descendant(self, target: str, curr: str) -> bool: + if curr == target: + return True + while curr != self.tax_root: + curr = self.tax_parent[curr] + if curr == target: + return True + return False + + def build_graph_and_anno_matrix(self, num_threads: int): + construct_command = '{exe} build -p {num_threads} -k {k} -o {outfile} {input}'.format( + exe=METAGRAPH, + num_threads=num_threads, + k=self.k, + outfile=self.tempdir.name + '/graph', + input=TAX_DATA_DIR + '/tax_input.fa' + ) + res = subprocess.run([construct_command], shell=True) + self.assertEqual(res.returncode, 0) + + annotate_command = '{exe} annotate --anno-header -i {dbg} -o {anno} -p {num_threads} {input_fasta}'.format( + exe=METAGRAPH, + dbg=self.tempdir.name + '/graph.dbg', + anno=self.tempdir.name + '/annotation', + num_threads=num_threads, + input_fasta=TAX_DATA_DIR + '/tax_input.fa' + ) + res = subprocess.run([annotate_command], shell=True) + self.assertEqual(res.returncode, 0) + + def get_prediction_statistics_from_stdout(self, stdout_lines: [str]) -> {}: + result = {"num_tip_hit": 0, + "num_internal_hit": 0, + "total_num_tips": 0, + "total_num_internals": 0, + "num_descendant_hit": 0, + "num_ancestor_hit_for_tips": 0, + "num_ancestor_hit_for_internals": 0, + "num_tip_misses": 0, + "num_internal_misses": 0, + "num_failed_classification": 0} + + for line in stdout_lines: + if line == "": + continue + query_expected = line.split(" ")[1].split("|")[1].strip() + query_prediction = line.split(" ")[7].split("'")[1].strip() + + # TaxId 0 is a wildcard for not enough discovered kmers to produce a confident classification. + if query_prediction == "0": + result["num_failed_classification"] += 1 + continue + + # All the tax nodes with ids {10001, 10002 .. 10008} represents internal nodes, while + # taxIds >= 10009 are reserved for the leaves. + if int(line.split(" ")[1].split("|")[1]) >= 10009: + # The current taxid is a tip, thus, it has no children in the taxonomic tree. + result["total_num_tips"] += 1 + if query_expected == query_prediction: + result["num_tip_hit"] += 1 + else: + if self.is_descendant(target=query_prediction, curr=query_expected): + result["num_ancestor_hit_for_tips"] += 1 + else: + result["num_tip_misses"] += 1 + else: + # The current taxid is an internal node. + result["total_num_internals"] += 1 + if query_expected == query_prediction: + result["num_internal_hit"] += 1 + else: + if self.is_descendant(target=query_prediction, curr=query_expected): + result["num_ancestor_hit_for_internals"] += 1 + elif self.is_descendant(target=query_expected, curr=query_prediction): + result["num_descendant_hit"] += 1 + else: + result["num_internal_misses"] += 1 + return result + + @parameterized.expand(test_params) + @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets") + def test_taxonomy_getrows(self, tax_test): + self.build_graph_and_anno_matrix(tax_tests[tax_test]['threads']) + tax_class_command = '{exe} tax_class -i {dbg} {fasta_queries} --taxonomic-tree {tax_tree} \ + --min-lca-coverage {lca_coverage} --label-taxid-map {label_taxid_map} ' \ + '-p {num_threads} -a {anno}'.format( + exe=METAGRAPH, + dbg=self.tempdir.name + '/graph.dbg', + fasta_queries=TAX_DATA_DIR + '/tax_query.fa', + tax_tree=TAX_DATA_DIR + '/dumb_nodes.dmp', + lca_coverage=self.lca_coverage, + label_taxid_map=TAX_DATA_DIR + '/dumb.accession2taxid', + num_threads=tax_tests[tax_test]['threads'], + anno=self.tempdir.name + '/annotation.column.annodbg', + ) + res = subprocess.run([tax_class_command], shell=True, stdout=PIPE) + self.assertEqual(res.returncode, 0) + + res_lines = res.stdout.decode().rstrip().split('\n') + statistics = self.get_prediction_statistics_from_stdout(res_lines) + + self.assertEqual(statistics["total_num_tips"], 118) + self.assertEqual(statistics["total_num_internals"], 80) + + self.assertEqual(statistics["num_tip_hit"], 109) + self.assertEqual(statistics["num_internal_hit"], 38) + + self.assertEqual(statistics["num_ancestor_hit_for_internals"], 5) + self.assertEqual(statistics["num_descendant_hit"], 34) + self.assertEqual(statistics["num_ancestor_hit_for_tips"], 9) + + self.assertEqual(statistics["num_internal_misses"], 3) + self.assertEqual(statistics["num_tip_misses"], 0) + + self.assertEqual(statistics["num_failed_classification"], 2) + + @parameterized.expand(test_params) + @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets") + def test_taxonomy_toplabels(self, tax_test): + self.build_graph_and_anno_matrix(tax_tests[tax_test]['threads']) + tax_class_command = '{exe} tax_class -i {dbg} {fasta_queries} --taxonomic-tree {tax_tree} \ + --min-lca-coverage {lca_coverage} -p {num_threads} -a {anno} \ + --label-taxid-map {label_taxid_map} \ + --top-label-fraction {top_label_fraction}'.format( + exe=METAGRAPH, + dbg=self.tempdir.name + '/graph.dbg', + fasta_queries=TAX_DATA_DIR + '/tax_query.fa', + tax_tree=TAX_DATA_DIR + '/dumb_nodes.dmp', + lca_coverage=self.lca_coverage, + label_taxid_map=TAX_DATA_DIR + '/dumb.accession2taxid', + num_threads=tax_tests[tax_test]['threads'], + anno=self.tempdir.name + '/annotation.column.annodbg', + top_label_fraction=0.7, + ) + res = subprocess.run([tax_class_command], shell=True, stdout=PIPE) + self.assertEqual(res.returncode, 0) + + res_lines = res.stdout.decode().rstrip().split('\n') + statistics = self.get_prediction_statistics_from_stdout(res_lines) + + self.assertEqual(statistics["total_num_tips"], 118) + self.assertEqual(statistics["total_num_internals"], 68) + + self.assertEqual(statistics["num_tip_hit"], 74) + self.assertEqual(statistics["num_internal_hit"], 24) + + self.assertEqual(statistics["num_ancestor_hit_for_internals"], 27) + self.assertEqual(statistics["num_descendant_hit"], 15) + self.assertEqual(statistics["num_ancestor_hit_for_tips"], 44) + + self.assertEqual(statistics["num_internal_misses"], 2) + self.assertEqual(statistics["num_tip_misses"], 0) + + self.assertEqual(statistics["num_failed_classification"], 14) diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index 640f4d1418..26cf93d5ee 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -8,12 +8,30 @@ #include "common/seq_tools/reverse_complement.hpp" #include "common/utils/string_utils.hpp" #include "common/logger.hpp" +#include "graph/representation/base/sequence_graph.hpp" namespace mtg { namespace annot { using mtg::common::logger; +bool TaxonomyBase::get_taxid_from_label(const std::string &label, TaxId *taxid) const { + if (label_type_ == TAXID) { + *taxid = std::stoul(utils::split_string(label, "|")[1]); + return true; + } else if (label_type_ == GEN_BANK) { + auto it_acc_version_taxid = accversion_to_taxid_map_.find(get_accession_version_from_label(label)); + if (it_acc_version_taxid == accversion_to_taxid_map_.end()) { + return false; + } + *taxid = it_acc_version_taxid->second; + return true; + } + + logger->error("Error: Could not get the taxid for label {}", label); + exit(1); +} + std::string TaxonomyBase::get_accession_version_from_label(const std::string &label) const { switch (label_type_) { case TAXID: @@ -131,49 +149,6 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenLi std::ifstream f(tax_tree_filepath); if (!f.good()) { logger->error("Error: Failed to open Taxonomic Tree file {}", tax_tree_filepath); -TaxonomyClsAnno::TaxonomyClsAnno(const graph::AnnotatedDBG &anno, - const double lca_coverage_rate, - const double kmers_discovery_rate, - const std::string &tax_tree_filepath, - const std::string &label_taxid_map_filepath) - : TaxonomyBase(lca_coverage_rate, kmers_discovery_rate), - _anno_matrix(&anno) { - if (!std::filesystem::exists(tax_tree_filepath)) { - logger->error("Can't open taxonomic tree file {}", tax_tree_filepath); - exit(1); - } - - bool require_accversion_to_taxid_map = assign_label_type(_anno_matrix->get_annotation().get_all_labels()[0]); - - Timer timer; - if (require_accversion_to_taxid_map) { - logger->trace("Parsing label_taxid_map file..."); - read_accversion_to_taxid_map(label_taxid_map_filepath, _anno_matrix); - logger->trace("Finished label_taxid_map file in {} sec", timer.elapsed()); - } - - timer.reset(); - logger->trace("Parsing taxonomic tree..."); - ChildrenList tree; - read_tree(tax_tree_filepath, &tree); - logger->trace("Finished taxonomic tree read in {} sec.", timer.elapsed()); - - timer.reset(); - logger->trace("Calculating tree statistics..."); - std::vector tree_linearization; - dfs_statistics(root_node, tree, &tree_linearization); - logger->trace("Finished tree statistics calculation in {} sec.", timer.elapsed()); - - timer.reset(); - logger->trace("Starting rmq preprocessing..."); - rmq_preprocessing(tree_linearization); - logger->trace("Finished rmq preprocessing in {} sec.", timer.elapsed()); -} - -void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenList *tree) { - std::ifstream f(tax_tree_filepath); - if (!f.good()) { - logger->error("Failed to open Taxonomic Tree file {}", tax_tree_filepath); exit(1); } @@ -219,7 +194,7 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenLi } assert(relevant_taxids.size()); - uint64_t num_taxid_failed = 0; // num_taxid_failed is used for logging only. + uint32_t num_taxid_failed = 0; // num_taxid_failed is used for logging only. for (uint32_t i = 0; i < relevant_taxids.size(); ++i) { TaxId taxid = relevant_taxids[i]; auto it_taxid_parent = node_parent_.find(taxid); @@ -309,49 +284,48 @@ void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_lineariza } std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { - // num_kmers represents the total number of kmers parsed until the current time. + // num_kmers represents the total number of kmers. uint32_t num_kmers = 0; // 'kmer_idx' and 'kmer_val' are storing the indexes and values of all the nonzero kmers in the given read. - // The list of kmers, 'kmer_val', will be further sent to "matrix.getrows()" method; - // The list of indexes, 'kmer_idx', will be used to associate one row from "matrix.getrows()" with the corresponding kmer index. + // The list of kmers (kmer_val) will be further sent to "matrix.getrows()" method; + // The list of indexes (kmer_idx) will be used to link each row from "matrix.getrows()" to the corresponding kmer index. std::vector kmer_idx; std::vector kmer_val; if (sequence.size() >= std::numeric_limits::max()) { - logger->error("The given sequence contains more than 2^32 bp."); + logger->error("Error: The given sequence contains more than 2^32 bp."); exit(1); } - auto anno_graph = _anno_matrix->get_graph_ptr(); + std::shared_ptr anno_graph = anno_matrix_->get_graph_ptr(); anno_graph->map_to_nodes(sequence, [&](node_index i) { num_kmers++; - if (i <= 0 || i >= anno_graph->max_index()) { + if (!i || i >= anno_graph->max_index()) { return; } kmer_val.push_back(i - 1); kmer_idx.push_back(num_kmers - 1); }); - // Compute the LCA normalized taxid for each nonzero kmer in the given read. - const auto unique_matrix_rows = _anno_matrix->get_annotation().get_matrix().get_rows(kmer_val); + // Compute the LCA taxid for each nonzero kmer in the given read. + const auto unique_matrix_rows = anno_matrix_->get_annotation().get_matrix().get_rows(kmer_val); //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. if (kmer_val.size() != unique_matrix_rows.size()) { - throw std::runtime_error("Internal error: The tool doesn't know how to treat the case of " - "kmer duplications in the same read. Please contact the maintainers."); + throw std::runtime_error("Error: The current implementation doesn't work in case of multiple occurrences" + " of the same kmer in one read."); } if (unique_matrix_rows.size() >= std::numeric_limits::max()) { - throw std::runtime_error("Internal error: There must be less than 2^32 unique rows. " + throw std::runtime_error("Error: There must be less than 2^32 unique rows in one anno matrix query. " "Please reduce the query batch size."); } - - const auto &label_encoder = _anno_matrix->get_annotation().get_label_encoder(); + const auto &label_encoder = anno_matrix_->get_annotation().get_label_encoder(); TaxId taxid; - uint64_t cnt_kmer_idx = 0; + uint32_t curr_kmer_identifier = 0; std::vector curr_kmer_taxids; - std::vector seq_taxids(num_kmers); + std::vector lca_taxids(num_kmers); for (auto row : unique_matrix_rows) { for (auto cell : row) { @@ -361,16 +335,16 @@ std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_vie } if (curr_kmer_taxids.size() != 0) { if (not reversed) { - seq_taxids[kmer_idx[cnt_kmer_idx]] = find_lca(curr_kmer_taxids); + lca_taxids[kmer_idx[curr_kmer_identifier]] = find_lca(curr_kmer_taxids); } else { - seq_taxids[num_kmers - 1 - kmer_idx[cnt_kmer_idx]] = find_lca(curr_kmer_taxids); + lca_taxids[num_kmers - 1 - kmer_idx[curr_kmer_identifier]] = find_lca(curr_kmer_taxids); } } - cnt_kmer_idx++; + curr_kmer_identifier++; curr_kmer_taxids.clear(); } - return seq_taxids; + return lca_taxids; } TaxId TaxonomyBase::assign_class(const std::string &sequence) const { @@ -380,13 +354,15 @@ TaxId TaxonomyBase::assign_class(const std::string &sequence) const { reverse_complement(reversed_sequence.begin(), reversed_sequence.end()); std::vector backward_taxids = get_lca_taxids_for_seq(reversed_sequence, true); - tsl::hopscotch_map num_kmers_per_node; + tsl::hopscotch_map num_kmers_per_taxid; - // num_discovered_kmers represents the number of nonzero kmers according to at least of the forward and reversed read options. + // num_discovered_kmers represents the number of nonzero kmers according to the forward and/or reversed read. uint32_t num_discovered_kmers = 0; - const uint32_t num_total_kmers = forward_taxids.size(); + // num_total_kmers is equal to the total number of zero/nonzero kmers in the read: size_read - k + 1 + uint32_t num_total_kmers = forward_taxids.size(); - // Find the LCA taxid for each kmer without any dependency on the orientation of the read. + // Find the LCA taxid for each kmer without considering the orientation of the read. + // In case that both forward and reversed read have a nonzero kmer, then assign the LCA of those 2 kmers. for (uint32_t i = 0; i < num_total_kmers; ++i) { if (forward_taxids[i] == 0 && backward_taxids[i] == 0) { continue; @@ -410,98 +386,97 @@ TaxId TaxonomyBase::assign_class(const std::string &sequence) const { } if (curr_taxid) { num_discovered_kmers ++; - num_kmers_per_node[curr_taxid]++; + num_kmers_per_taxid[curr_taxid]++; } } - if (num_discovered_kmers <= _kmers_discovery_rate * num_total_kmers) { + if (num_discovered_kmers <= kmers_discovery_rate_ * num_total_kmers) { return 0; // 0 is a wildcard for not enough discovered kmers. } - tsl::hopscotch_set nodes_already_propagated; - tsl::hopscotch_map node_scores; + tsl::hopscotch_set taxid_already_propagated; + tsl::hopscotch_map taxid_scores; - uint32_t desired_number_kmers = num_discovered_kmers * _lca_coverage_rate; - TaxId best_lca = root_node; + uint32_t min_required_kmers = num_discovered_kmers * lca_coverage_rate_; + TaxId best_lca = root_node_; uint32_t best_lca_dist_to_root = 1; // Update the nodes' score by iterating through all the nodes with nonzero kmers. - for (const pair &node_pair : num_kmers_per_node) { - TaxId start_node = node_pair.first; - this->update_scores_and_lca(start_node, num_kmers_per_node, desired_number_kmers, &node_scores, - &nodes_already_propagated, &best_lca, &best_lca_dist_to_root); + for (const auto &[taxid, _] : num_kmers_per_taxid) { + this->update_scores_and_lca(taxid, num_kmers_per_taxid, min_required_kmers, &taxid_scores, + &taxid_already_propagated, &best_lca, &best_lca_dist_to_root); } return best_lca; } - -void TaxonomyBase::update_scores_and_lca(const TaxId start_node, - const tsl::hopscotch_map &num_kmers_per_node, - const uint64_t desired_number_kmers, - tsl::hopscotch_map *node_scores, - tsl::hopscotch_set *nodes_already_propagated, +void TaxonomyBase::update_scores_and_lca(TaxId start_taxid, + const tsl::hopscotch_map &num_kmers_per_taxid, + uint32_t min_required_kmers, + tsl::hopscotch_map *taxid_scores, + tsl::hopscotch_set *taxid_already_propagated, TaxId *best_lca, uint32_t *best_lca_dist_to_root) const { - if (nodes_already_propagated->count(start_node)) { + if (taxid_already_propagated->count(start_taxid)) { return; } - uint64_t score_from_processed_parents = 0; - uint64_t score_from_unprocessed_parents = num_kmers_per_node.at(start_node); + uint32_t score_from_processed_parents = 0; + uint32_t score_from_unprocessed_parents = num_kmers_per_taxid.at(start_taxid); - // processed_parents represents the set of nodes on the path start_node->root that have already been processed in the previous iterations. + // processed_parents represents the set of nodes on the path start_taxid->root that have already been processed in the previous iterations. std::vector processed_parents; std::vector unprocessed_parents; - TaxId act_node = start_node; + TaxId act_node = start_taxid; unprocessed_parents.push_back(act_node); - while (act_node != root_node) { - act_node = node_parent.at(act_node); - if (!nodes_already_propagated->count(act_node)) { - if (num_kmers_per_node.count(act_node)) { - score_from_unprocessed_parents += num_kmers_per_node.at(act_node); + while (act_node != root_node_) { + act_node = node_parent_.at(act_node); + auto num_kmers_act_node_it = num_kmers_per_taxid.find(act_node); + if (!taxid_already_propagated->count(act_node)) { + if (num_kmers_act_node_it != num_kmers_per_taxid.end()) { + score_from_unprocessed_parents += num_kmers_act_node_it->second; } unprocessed_parents.push_back(act_node); } else { - if (num_kmers_per_node.count(act_node)) { - score_from_processed_parents += num_kmers_per_node.at(act_node); + if (num_kmers_act_node_it != num_kmers_per_taxid.end()) { + score_from_processed_parents += num_kmers_act_node_it->second; } processed_parents.push_back(act_node); } } // The score of all the nodes in 'processed_parents' will be updated with 'score_from_unprocessed_parents' only. // The nodes in 'unprocessed_parents' will be updated with the sum 'score_from_processed_parents + score_from_unprocessed_parents'. - for (uint64_t i = 0; i < unprocessed_parents.size(); ++i) { + for (uint32_t i = 0; i < unprocessed_parents.size(); ++i) { TaxId &act_node = unprocessed_parents[i]; - (*node_scores)[act_node] = + (*taxid_scores)[act_node] = score_from_processed_parents + score_from_unprocessed_parents; - nodes_already_propagated->insert(act_node); + taxid_already_propagated->insert(act_node); - uint64_t act_dist_to_root = + uint32_t act_dist_to_root = processed_parents.size() + unprocessed_parents.size() - i; // Test if the current node's score would be a better LCA result. - if ((*node_scores)[act_node] >= desired_number_kmers + if ((*taxid_scores)[act_node] >= min_required_kmers && (act_dist_to_root > *best_lca_dist_to_root || (act_dist_to_root == *best_lca_dist_to_root - && (*node_scores)[act_node] > (*node_scores)[*best_lca]) - ) - ) { + && (*taxid_scores)[act_node] > (*taxid_scores)[*best_lca]) + ) + ) { *best_lca = act_node; *best_lca_dist_to_root = act_dist_to_root; } } - for (uint64_t i = 0; i < processed_parents.size(); ++i) { + for (uint32_t i = 0; i < processed_parents.size(); ++i) { TaxId &act_node = processed_parents[i]; - (*node_scores)[act_node] += score_from_unprocessed_parents; + (*taxid_scores)[act_node] += score_from_unprocessed_parents; - uint64_t act_dist_to_root = processed_parents.size() - i; - if ((*node_scores)[act_node] >= desired_number_kmers + uint32_t act_dist_to_root = processed_parents.size() - i; + if ((*taxid_scores)[act_node] >= min_required_kmers && (act_dist_to_root > *best_lca_dist_to_root || (act_dist_to_root == *best_lca_dist_to_root - && (*node_scores)[act_node] > (*node_scores)[*best_lca]) - ) - ) { + && (*taxid_scores)[act_node] > (*taxid_scores)[*best_lca]) + ) + ) { *best_lca = act_node; *best_lca_dist_to_root = act_dist_to_root; } @@ -510,47 +485,67 @@ void TaxonomyBase::update_scores_and_lca(const TaxId start_node, TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { if (taxids.empty()) { - logger->error("Internal error: Can't find LCA for an empty set of normalized taxids."); + logger->error("Error: Can't find LCA for an empty set of normalized taxids."); exit(1); } - uint64_t left_idx = node_to_linearization_idx.at(taxids[0]); - uint64_t right_idx = node_to_linearization_idx.at(taxids[0]); + uint32_t left_idx = node_to_linearization_idx_.at(taxids[0]); + uint32_t right_idx = node_to_linearization_idx_.at(taxids[0]); for (const TaxId &taxid : taxids) { - if (node_to_linearization_idx.at(taxid) < left_idx) { - left_idx = node_to_linearization_idx.at(taxid); + uint32_t curr_idx = node_to_linearization_idx_.at(taxid); + if (curr_idx < left_idx) { + left_idx = curr_idx; } - if (node_to_linearization_idx.at(taxid) > right_idx) { - right_idx = node_to_linearization_idx.at(taxid); + if (curr_idx > right_idx) { + right_idx = curr_idx; } } // The node with maximum node_depth in 'linearization[left_idx : right_idx+1]' is the LCA of the given set. // Find the maximum node_depth between the 2 overlapping intervals of size 2^log_dist. uint32_t log_dist = sdsl::bits::hi(right_idx - left_idx); - if (rmq_data.size() <= log_dist) { - logger->error("Internal error: the RMQ was not precomputed before the LCA queries."); + if (rmq_data_.size() <= log_dist) { + logger->error("Error: the RMQ was not precomputed before the LCA queries."); exit(1); } - uint32_t left_lca = rmq_data[log_dist][left_idx]; - uint32_t right_lca = rmq_data[log_dist][right_idx - (1 << log_dist) + 1]; + uint32_t left_lca = rmq_data_[log_dist][left_idx]; + uint32_t right_lca = rmq_data_[log_dist][right_idx - (1 << log_dist) + 1]; - if (node_depth.at(left_lca) > node_depth.at(right_lca)) { + if (node_depth_.at(left_lca) > node_depth_.at(right_lca)) { return left_lca; } return right_lca; } -std::vector TaxonomyClsImportDB::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { - cerr << "Assign class not implemented reversed = " << reversed << "\n"; - throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsImportDB not implemented. Received seq size" - + to_string(sequence.size())); -} +TaxId TaxonomyClsAnno::assign_class_toplabels(const std::string &sequence, double label_fraction) const { + // Get all the labels with a frequency higher than 'label_fraction' among the kmers in the forward read. + std::vector labels_discovered = anno_matrix_->get_labels(sequence, label_fraction); + + std::string reversed_sequence = sequence; + reverse_complement(reversed_sequence.begin(), reversed_sequence.end()); + // Get all the labels with a frequency higher than 'label_fraction' among the kmers in the reversed read. + std::vector labels_discovered_rev = anno_matrix_->get_labels(reversed_sequence, label_fraction); -TaxId TaxonomyClsImportDB::find_lca(const std::vector &taxids) const { - throw std::runtime_error("find_lca TaxonomyClsImportDB not implemented. Received taxids size" - + to_string(taxids.size())); + // Usually, only one of the two sets ('labels_discovered', 'labels_discovered_rev') will be nonempty. + + std::vector curr_taxids; + for (uint32_t i = 0; i < labels_discovered.size(); ++i) { + TaxId act; + if(get_taxid_from_label(labels_discovered[i], &act)) { + curr_taxids.push_back(act); + } + } + for (uint32_t i = 0; i < labels_discovered_rev.size(); ++i) { + TaxId act; + if(get_taxid_from_label(labels_discovered_rev[i], &act)) { + curr_taxids.push_back(act); + } + } + if (curr_taxids.size() == 0) { + return 0; // Wildcard for not being able to assign a taxid. + } + return find_lca(curr_taxids); } } // namespace annot diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.hpp b/metagraph/src/annotation/taxonomy/tax_classifier.hpp index 0dd97dd3cd..17f6be2c70 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.hpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.hpp @@ -27,10 +27,19 @@ class TaxonomyBase { : lca_coverage_rate_(lca_coverage_rate), kmers_discovery_rate_(kmers_discovery_rate) {} virtual ~TaxonomyBase() {} + TaxId assign_class(const std::string &sequence) const; protected: + virtual TaxId find_lca(const std::vector &taxids) const = 0; + std::string get_accession_version_from_label(const std::string &label) const; + /** Parse a given label in order to return the associated taxid. + * + * @param [input] 'label' -> to get taxid from + * @param [output] 'taxid' -> save the found taxid + * @return true only if the taxid search was successful. + */ bool get_taxid_from_label(const std::string &label, TaxId *taxid) const; /** Reads the accession version to taxid lookup table. @@ -42,6 +51,33 @@ class TaxonomyBase { */ void read_accversion_to_taxid_map(const std::string &filepath, const graph::AnnotatedDBG *anno_matrix = NULL); + /** + * Update the current node_scores and best_lca by taking into account the weight of the start_node and all its ancestors. + * + * @param [input] 'start_taxnode' -> the starting taxnode to update 'taxid_scores'. + * @param [input] 'num_kmers_per_taxid[taxid]' -> the number of kmers 'k' with taxonomic_map[k]=taxid. + * @param [input] 'min_required_kmers' -> the threshold score representing the minimal number of kmers that have to be linked + * to a certain (potential solution) taxnode/LCA, its subtree or its ancestors. + * @param [modified] 'taxid_scores' -> the current score for each taxnode in the taxonomic tree. + * @param [modified] 'taxid_already_propagated' -> the set of taxnodes that were previously processed. + * @param [modified] 'best_lca' -> the current classification prediction (taxnode that exceeds the `min_required_kmers` + * threshold and is placed as close as possible to the leaves). + * @param [modified] 'best_lca_dist_to_root' -> the distance to the root for the current classification prediction. + */ + void update_scores_and_lca(TaxId start_taxid, + const tsl::hopscotch_map &num_kmers_per_taxid, + uint32_t min_required_kmers, + tsl::hopscotch_map *taxid_scores, + tsl::hopscotch_set *taxid_already_propagated, + TaxId *best_lca, + uint32_t *best_lca_dist_to_root) const; + + /** + * Get the list of LCA taxids associated to each kmer in a given sequences. + * The sequence can be given in forward or in reversed orientation. + */ + virtual std::vector get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const = 0; + LabelType label_type_; /** @@ -82,9 +118,9 @@ class TaxonomyClsAnno : public TaxonomyBase { const std::string &label_taxid_map_filepath = ""); TaxonomyClsAnno() {} - TaxId assign_class_toplabels(const std::string &sequence, const double label_fraction) const; + TaxId assign_class_toplabels(const std::string &sequence, double label_fraction) const; - private: + private: /** * Reads and returns the taxonomic tree as a list of children. * diff --git a/metagraph/src/cli/config/config.cpp b/metagraph/src/cli/config/config.cpp index 987a0b965a..7bbdd0ca88 100644 --- a/metagraph/src/cli/config/config.cpp +++ b/metagraph/src/cli/config/config.cpp @@ -73,6 +73,8 @@ Config::Config(int argc, char *argv[]) { identity = ASSEMBLE; } else if (!strcmp(argv[1], "relax_brwt")) { identity = RELAX_BRWT; + } else if (!strcmp(argv[1], "tax_class")) { + identity = TAX_CLASS; } else if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { print_welcome_message(); print_usage(argv[0]); @@ -276,6 +278,16 @@ Config::Config(int argc, char *argv[]) { anno_labels_delimiter = std::string(get_value(i++)); } else if (!strcmp(argv[i], "--separately")) { separately = true; + } else if (!strcmp(argv[i], "--taxonomic-tree")) { + taxonomic_tree = std::string(get_value(i++)); + } else if (!strcmp(argv[i], "--taxonomic-db")) { + taxonomic_db = std::string(get_value(i++)); + } else if (!strcmp(argv[i], "--min-lca-coverage")) { + min_lca_coverage = std::stof(get_value(i++)); + } else if (!strcmp(argv[i], "--top-label-fraction")) { + top_label_fraction = std::stof(get_value(i++)); + } else if (!strcmp(argv[i], "--label-taxid-map")) { + label_taxid_map = std::string(get_value(i++)); } else if (!strcmp(argv[i], "--num-top-labels")) { num_top_labels = atoi(get_value(i++)); } else if (!strcmp(argv[i], "--port")) { @@ -869,6 +881,8 @@ void Config::print_usage(const std::string &prog_name, IdentityType identity) { fprintf(stderr, "\tquery\t\tannotate sequences from fast[a|q] files\n\n"); fprintf(stderr, "\tserver_query\tannotate received sequences and send annotations back\n\n"); + fprintf(stderr, "\ttax_class \tclassify sequences according to the taxonomic hierarchy\n"); + return; } case BUILD: { @@ -1258,6 +1272,23 @@ void Config::print_usage(const std::string &prog_name, IdentityType identity) { fprintf(stderr, "\t-p --parallel [INT] \tmaximum number of parallel connections [1]\n"); // fprintf(stderr, "\t --cache-size [INT] \tnumber of uncompressed rows to store in the cache [0]\n"); } break; + case TAX_CLASS: { + fprintf(stderr, "Usage: %s tax_class [options]\n" + "\t\t -i FILE1 [[FILE2] ...]\n" + "\t\t -a --taxonomic_tree \n" + "\t\t [--label-taxid-map <*.accession2taxid>]" + "\tEach input file is given in FASTA or FASTQ format.\n\n", prog_name.c_str()); + + fprintf(stderr, "Available options for taxonomic classification:\n"); + fprintf(stderr, "\t --min-lca-coverage [FLOAT] \tminimal fraction of kmers that have to be linked\n" + "\t\t\t\t\t\tto a certain (potential solution) taxnode/LCA, its subtree or its ancestors [0.66]\n"); + fprintf(stderr, "\t-p --parallel [INT] \t\t\tuse multiple threads for computation [1]\n"); + fprintf(stderr, "\t --discovery-fraction [FLOAT] \tminimal fraction of labeled k-mers required\n" + "\t\t\t\t\t\tfor a valid annotation [0.7]\n"); + fprintf(stderr, "\t --top-label-fraction [FLOAT] \tif greater than 0, use a faster taxonomic\n" + "\t\t\t\t\t\tclassification algorithm which returns the LCA of all the labels linked to\n" + "\t\t\t\t\t\tat least 'top_label_fraction' percent of the existent kmers [0]\n"); + } break; } fprintf(stderr, "\n\tGeneral options:\n"); diff --git a/metagraph/src/cli/config/config.hpp b/metagraph/src/cli/config/config.hpp index 801e63390d..531f1730f1 100644 --- a/metagraph/src/cli/config/config.hpp +++ b/metagraph/src/cli/config/config.hpp @@ -135,6 +135,8 @@ class Config { double alignment_max_nodes_per_seq_char = 12.0; double alignment_max_ram = 200; double alignment_min_exact_match = 0.0; + double min_lca_coverage = 0.66; + double top_label_fraction = 0; double min_fraction = 0.0; double max_fraction = 1.0; std::vector count_slice_quantiles; @@ -155,6 +157,9 @@ class Config { std::string fasta_anno_comment_delim = UNINITIALIZED_STR; std::string header = ""; std::string host_address; + std::string taxonomic_tree; + std::string taxonomic_db; + std::string label_taxid_map; std::string linkage_file; std::string intersected_columns; @@ -181,6 +186,7 @@ class Config { RELAX_BRWT, QUERY, SERVER_QUERY, + TAX_CLASS, }; IdentityType identity = NO_IDENTITY; diff --git a/metagraph/src/cli/tax_class.cpp b/metagraph/src/cli/tax_class.cpp new file mode 100644 index 0000000000..d0dccf8410 --- /dev/null +++ b/metagraph/src/cli/tax_class.cpp @@ -0,0 +1,142 @@ +#include "tax_class.hpp" + +#include "annotation/taxonomy/tax_classifier.hpp" +#include "common/threads/threading.hpp" +#include "common/unix_tools.hpp" +#include "config/config.hpp" +#include "load/load_graph.hpp" +#include "load/load_annotated_graph.hpp" +#include "seq_io/sequence_io.hpp" + +#include "common/logger.hpp" + +namespace mtg { +namespace cli { + +using mtg::common::logger; + +const uint32_t QUERY_SEQ_BATCH_SIZE = 100000; + +void append_new_result(const std::string &seq_label, + const uint32_t taxid, + std::vector > *pair_label_taxid, + std::mutex *tax_mutex) { + std::scoped_lock guard(*tax_mutex); + (*pair_label_taxid).emplace_back(seq_label, taxid); +} + +void print_all_results(const std::vector > &pair_label_taxid, + const std::function &callback) { + for (const std::pair &label_taxid : pair_label_taxid) { + callback(label_taxid.first, label_taxid.second); + } +} + +void execute_fasta_file(const string &file, + std::function > &)> &callback) { + logger->trace("Parsing query sequences from file {}.", file); + + seq_io::FastaParser fasta_parser(file); + std::vector > seq_batch; + + for (const seq_io::kseq_t &kseq : fasta_parser) { + seq_batch.push_back({std::string(kseq.seq.s), std::string(kseq.name.s)}); + + if (seq_batch.size() != QUERY_SEQ_BATCH_SIZE) { + continue; + } + callback(seq_batch); + + logger->trace("Processing an another bucket of {} queries from file {}.", QUERY_SEQ_BATCH_SIZE, file); + seq_batch.clear(); + } + callback(seq_batch); +} + +int taxonomic_classification(Config *config) { + assert(config); + + const std::vector &files = config->fnames; + + Timer timer; + logger->trace("Graph loading..."); + auto graph = load_critical_dbg(config->infbase); + logger->trace("Finished graph loading after {} sec.", timer.elapsed()); + + timer.reset(); + logger->trace("Processing the classification..."); + ThreadPool thread_pool(std::max(1u, get_num_threads()) - 1, 1000); + + std::function > &)> callback; + + std::vector > pair_label_taxid; + std::mutex tax_mutex; + + std::unique_ptr taxonomy; + std::unique_ptr anno_graph; + + if (config->taxonomic_db != "") { + //todo implement + throw std::runtime_error("Error: taxonomic classification with taxDB is not implemented."); + } else { + // Use tax_class without any precomputed database. + if (config->infbase_annotators.size() == 0) { + logger->error("Error: The annotation matrix is missing from the command line, " + "please use '-a' flag for the annotation matrix filepath."); + std::exit(1); + } + timer.reset(); + logger->trace("Graph and Annotation loading..."); + graph = load_critical_dbg(config->infbase); + anno_graph = initialize_annotated_dbg(graph, *config); + logger->trace("Finished graph annotation loading after {} sec.", timer.elapsed()); + + timer.reset(); + logger->trace("Constructing TaxonomyClsAnno..."); + taxonomy = std::make_unique(*anno_graph, config->taxonomic_tree, + config->min_lca_coverage, config->discovery_fraction, + config->label_taxid_map); + logger->trace("Finished TaxonomyDB construction after {} sec.", timer.elapsed()); + + if (config->top_label_fraction > 0) { + // Use tax_class version which is returning the LCA of the top labels among the kmers. + // This version is fast, but less precise. + callback = [&](const std::vector > &seq_batch){ + thread_pool.enqueue([&](std::vector > sequences){ + for (std::pair &seq : sequences) { + append_new_result(seq.second, taxonomy->assign_class_toplabels( + seq.first, config->top_label_fraction), &pair_label_taxid, &tax_mutex); + } + }, std::move(seq_batch)); + }; + } else { + // Use tax_class version which computes the LCA taxid for each kmer. This version will produce the best + // prediction rate (identical to the one using taxdb, but the computation will be slightly slower). + callback = [&](const std::vector > &seq_batch){ + thread_pool.enqueue([&](std::vector > sequences){ + for (std::pair &seq : sequences) { + append_new_result(seq.second, taxonomy->assign_class(seq.first), &pair_label_taxid, &tax_mutex); + } + }, std::move(seq_batch)); + }; + } + } + + for (const std::string &file : files) { + execute_fasta_file(file, callback); + } + thread_pool.join(); + + print_all_results(pair_label_taxid, [](const std::string name_seq, const uint32_t &taxid) { + std::string result = fmt::format( + "Sequence '{}' was classified with Tax ID '{}'\n", + name_seq, taxid); + std::cout << result << std::endl; + }); + + logger->trace("Finished all the queries in {} sec.", timer.elapsed()); + return 0; +} + +} // namespace cli +} // namespace mtg diff --git a/metagraph/src/cli/tax_class.hpp b/metagraph/src/cli/tax_class.hpp new file mode 100644 index 0000000000..d00084aa35 --- /dev/null +++ b/metagraph/src/cli/tax_class.hpp @@ -0,0 +1,14 @@ +#ifndef __TAX_CLASSIFY_HPP__ +#define __TAX_CLASSIFY_HPP__ + +namespace mtg { +namespace cli { + +class Config; + +int taxonomic_classification(Config *config); + +} // namespace cli +} // namespace mtg + +#endif // __TAX_CLASSIFY_HPP__ diff --git a/metagraph/src/main.cpp b/metagraph/src/main.cpp index 99f7706b3f..b69b5fa77c 100644 --- a/metagraph/src/main.cpp +++ b/metagraph/src/main.cpp @@ -15,6 +15,7 @@ #include "cli/server.hpp" #include "cli/transform_graph.hpp" #include "cli/transform_annotation.hpp" +#include "cli/tax_class.hpp" using namespace mtg; using mtg::common::logger; @@ -86,6 +87,9 @@ int main(int argc, char *argv[]) { case Config::ALIGN: return cli::align_to_graph(config.get()); + case Config::TAX_CLASS: + return cli::taxonomic_classification(config.get()); + case Config::NO_IDENTITY: assert(false); } diff --git a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp index 0430f644b9..84d2bdaabb 100644 --- a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp +++ b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp @@ -2,8 +2,7 @@ #include #include -#include "seq_io/sequence_io.hpp" -#include "../test_annotated_dbg_helpers.hpp" +#include #define private public #define protected public @@ -89,8 +88,8 @@ TEST(TaxonomyTest, ClsAnno_RmqPreprocessing) { EXPECT_EQ(expected_rmq, tax->rmq_data_); } -TEST (TaxonomyTest, ClsAnno_FindLca) { - mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); +TEST(TaxonomyTest, ClsAnno_FindLca) { + std::unique_ptr tax = std::make_unique(); /* * Tree configuration: * node 0 -> 1 2 3 @@ -100,34 +99,34 @@ TEST (TaxonomyTest, ClsAnno_FindLca) { * node 4 -> 7 8 */ - tax->rmq_data = { - {0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0}, - {0, 1, 4, 4, 4, 4, 1, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0}, - {0, 1, 4, 4, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + tax->rmq_data_ = { + {0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0}, + {0, 1, 4, 4, 4, 4, 1, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0}, + {0, 1, 4, 4, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; - tax->node_to_linearization_idx = { - {0, 0}, - {1, 1}, - {2, 11}, - {3, 13}, - {4, 2}, - {5, 8}, - {6, 14}, - {7, 3}, - {8, 5}, + tax->node_to_linearization_idx_ = { + {0, 0}, + {1, 1}, + {2, 11}, + {3, 13}, + {4, 2}, + {5, 8}, + {6, 14}, + {7, 3}, + {8, 5}, }; - tax->node_depth = { - {0, 4}, - {1, 3}, - {2, 1}, - {3, 2}, - {4, 2}, - {5, 1}, - {6, 1}, - {7, 1}, - {8, 1}, + tax->node_depth_ = { + {0, 4}, + {1, 3}, + {2, 1}, + {3, 2}, + {4, 2}, + {5, 1}, + {6, 1}, + {7, 1}, + {8, 1}, }; struct query_lca { @@ -137,45 +136,46 @@ TEST (TaxonomyTest, ClsAnno_FindLca) { }; std::vector queries = { - {"test1", 0, {7, 6}}, - {"test2", 0, {1, 2}}, - {"test3", 0, {3, 4}}, - {"test4", 0, {1, 2, 5, 6}}, - {"test5", 2, {2}}, - {"test6", 3, {3, 6}}, - {"test6b", 3, {6, 3}}, - {"test7", 1, {7, 8, 5}}, - {"test8", 1, {4, 5}}, - {"test9", 4, {7, 8}}, - {"test10", 0, {0, 1, 2, 3, 4, 5, 6, 7, 8}}, + {"test1", 0, {7, 6}}, + {"test2", 0, {1, 2}}, + {"test3", 0, {3, 4}}, + {"test4", 0, {1, 2, 5, 6}}, + {"test5", 2, {2}}, + {"test6", 3, {3, 6}}, + {"test6b", 3, {6, 3}}, + {"test7", 1, {7, 8, 5}}, + {"test8", 1, {4, 5}}, + {"test9", 4, {7, 8}}, + {"test10", 0, {0, 1, 2, 3, 4, 5, 6, 7, 8}}, }; for(const auto &it: queries) { - EXPECT_EQ(make_pair(it.test_id, it.expected), + EXPECT_EQ(make_pair(it.test_id, it.expected), make_pair(it.test_id, tax->find_lca(it.nodes))); } } -TEST (TaxonomyTest, ClsAnno_ClassifierUpdateScoresAndLca) { +TEST(TaxonomyTest, ClsAnno_ClassifierUpdateScoresAndLca) { mtg::annot::TaxonomyClsAnno tax_classifier; - tax_classifier.root_node = 1; - tax_classifier.node_parent = { {1, 1}, - {2, 1}, {3, 1}, - {4, 3}, {5, 3}, - {6, 4}, {7, 4} - }; + tax_classifier.root_node_ = 1; + tax_classifier.node_parent_ = { {1, 1}, + {2, 1}, {3, 1}, + {4, 3}, {5, 3}, + {6, 4}, {7, 4} + }; - tsl::hopscotch_map num_kmers_per_node = { - {1, 20}, {2, 1}, {3, 15}, {4, 25}, {5, 6}, {6, 15}, {7, 3} // leaves 2, 7 and 5 have a smaller number of kmers. + tsl::hopscotch_map num_kmers_per_node = { + {1, 20}, {2, 1}, {3, 15}, {4, 25}, {5, 6}, {6, 15}, {7, 3} + // The leaves with ids 2, 7 and 5 have a smaller number of kmers. }; struct query_tax_map_update { std::string test_id; std::string description; - uint64_t desired_number_kmers; - vector> ordered_node_sets; - tsl::hopscotch_map expected_node_scores; + uint32_t min_required_kmers; + std::vector> ordered_node_sets; + tsl::hopscotch_map expected_node_scores; tsl::hopscotch_set expected_nodes_already_propagated; uint32_t expected_best_lca; uint32_t expected_best_lca_dist_to_root; @@ -183,21 +183,21 @@ TEST (TaxonomyTest, ClsAnno_ClassifierUpdateScoresAndLca) { // All the lists in `ordered_node_sets` are covering the entire taxonomic tree. // Thus, the evaluation of `update_scores_and_lca` on any of those sets should return the same results. - vector> ordered_node_sets = { - {1, 2, 3, 4, 5, 6, 7}, - {7, 6, 5, 4, 3, 2, 1}, - {7, 4, 6, 3, 5, 1, 2}, - {4, 6, 7, 3, 5, 1, 2}, - {2, 5, 4, 6, 7, 3, 1}, - {2, 6, 7, 5}, - {6, 7, 5, 2}, - {6, 7, 5, 2, 1}, - {3, 5, 6, 7, 2} + std::vector> ordered_node_sets = { + {1, 2, 3, 4, 5, 6, 7}, + {7, 6, 5, 4, 3, 2, 1}, + {7, 4, 6, 3, 5, 1, 2}, + {4, 6, 7, 3, 5, 1, 2}, + {2, 5, 4, 6, 7, 3, 1}, + {2, 6, 7, 5}, + {6, 7, 5, 2}, + {6, 7, 5, 2, 1}, + {3, 5, 6, 7, 2} }; std::vector tests = { { "test1", - "desired_number_kmers is equal to node_score[6]; expect LCA taxid = 6", + "min_required_kmers is equal to node_score[6]; expect LCA taxid = 6", 75, ordered_node_sets, {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, @@ -206,81 +206,81 @@ TEST (TaxonomyTest, ClsAnno_ClassifierUpdateScoresAndLca) { 4 }, { "test2", - "desired_number_kmers is equal to node_score[6]+1; expect LCA taxid = 4", - 76, - ordered_node_sets, - {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, - {1, 2, 3, 4, 5, 6, 7}, - 4, - 3 + "min_required_kmers is equal to node_score[6]+1; expect LCA taxid = 4", + 76, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 4, + 3 }, { "test3", - "desired_number_kmers is equal to node_score[4]+1; expect LCA taxid = 3", - 79, - ordered_node_sets, - {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, - {1, 2, 3, 4, 5, 6, 7}, - 3, - 2 + "min_required_kmers is equal to node_score[4]+1; expect LCA taxid = 3", + 79, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 3, + 2 }, { "test4", - "desired_number_kmers is equal to node_score[3]+1; expect LCA taxid = 1", - 85, - ordered_node_sets, - {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, - {1, 2, 3, 4, 5, 6, 7}, - 1, - 1 + "min_required_kmers is equal to node_score[3]+1; expect LCA taxid = 1", + 85, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 1, + 1 }, { "test5", - "Check updated scores after processing only node 4", - 100, - {{4}}, - {{4, 60}, {3, 60}, {1, 60}}, - {1, 3, 4}, - 1, - 1 + "Check updated scores after processing only node 4", + 100, + {{4}}, + {{4, 60}, {3, 60}, {1, 60}}, + {1, 3, 4}, + 1, + 1 }, { "test6", - "Check updated scores after processing only the nodes 4 and 6", - 100, - {{4, 6}, {6, 4}}, - {{6, 75}, {4, 75}, {3, 75}, {1, 75}}, - {1, 3, 4, 6}, - 1, - 1 + "Check updated scores after processing only the nodes 4 and 6", + 100, + {{4, 6}, {6, 4}}, + {{6, 75}, {4, 75}, {3, 75}, {1, 75}}, + {1, 3, 4, 6}, + 1, + 1 }, { "test7", - "Check updated scores after processing only the nodes 7 and 5", - 100, - {{7, 5}, {5, 7}}, - {{7, 63}, {4, 63}, {5, 41}, {3, 69}, {1, 69}}, - {1, 3, 4, 5, 7}, - 1, - 1 + "Check updated scores after processing only the nodes 7 and 5", + 100, + {{7, 5}, {5, 7}}, + {{7, 63}, {4, 63}, {5, 41}, {3, 69}, {1, 69}}, + {1, 3, 4, 5, 7}, + 1, + 1 }, { "test8", - "Check updated scores after processing only the nodes 2, 6 and 7", - 100, - {{2, 6, 7}, {2, 7, 6}, {6, 2, 7}, {6, 7, 2}, {7, 2, 6}, {7, 6, 2}}, - {{1, 79}, {2, 21}, {3, 78}, {4, 78}, {6, 75}, {7, 63}}, - {1, 2, 3, 4, 6, 7}, - 1, - 1 + "Check updated scores after processing only the nodes 2, 6 and 7", + 100, + {{2, 6, 7}, {2, 7, 6}, {6, 2, 7}, {6, 7, 2}, {7, 2, 6}, {7, 6, 2}}, + {{1, 79}, {2, 21}, {3, 78}, {4, 78}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 6, 7}, + 1, + 1 }, }; for (const auto &test: tests) { for (std::vector nodes_set : test.ordered_node_sets) { tsl::hopscotch_set nodes_already_propagated; - tsl::hopscotch_map node_scores; - uint32_t best_lca = tax_classifier.root_node; + tsl::hopscotch_map node_scores; + uint32_t best_lca = tax_classifier.root_node_; uint32_t best_lca_dist_to_root = 1; - for (uint64_t node: nodes_set) { - tax_classifier.update_scores_and_lca(node, num_kmers_per_node, test.desired_number_kmers, - &node_scores, &nodes_already_propagated, - &best_lca, &best_lca_dist_to_root); + for (uint32_t node: nodes_set) { + tax_classifier.update_scores_and_lca(node, num_kmers_per_node, test.min_required_kmers, + &node_scores, &nodes_already_propagated, + &best_lca, &best_lca_dist_to_root); } EXPECT_EQ(make_pair(test.test_id, test.expected_node_scores), diff --git a/metagraph/tests/data/taxonomic_data/dumb.accession2taxid b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid new file mode 100644 index 0000000000..ca9ba2566f --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid @@ -0,0 +1,21 @@ +accession accession.version taxid gi +NC_01 NC_01.1 10001 10001 +NC_02 NC_02.1 10002 10002 +NC_03 NC_04.1 10003 10003 +NC_04 NC_04.1 10004 10004 +NC_05 NC_05.1 10005 10005 +NC_06 NC_06.1 10006 10006 +NC_07 NC_07.1 10007 10007 +NC_08 NC_08.1 10008 10008 +NC_09 NC_09.1 10009 10009 +NC_10 NC_10.1 10010 10010 +NC_11 NC_11.1 10011 10011 +NC_12 NC_12.1 10012 10012 +NC_13 NC_13.1 10013 10013 +NC_14 NC_14.1 10014 10014 +NC_15 NC_15.1 10015 10015 +NC_16 NC_16.1 10016 10016 +NC_17 NC_17.1 10017 10017 +NC_18 NC_18.1 10018 10018 +NC_19 NC_19.1 10019 10019 +NC_20 NC_20.1 10020 10020 diff --git a/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp new file mode 100644 index 0000000000..c721ad085a --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp @@ -0,0 +1,20 @@ +10001 | 10001 +10002 | 10001 +10003 | 10001 +10004 | 10002 +10005 | 10002 +10006 | 10002 +10007 | 10003 +10008 | 10003 +10009 | 10004 +10010 | 10004 +10011 | 10004 +10012 | 10005 +10013 | 10005 +10014 | 10006 +10015 | 10006 +10016 | 10007 +10017 | 10007 +10018 | 10007 +10019 | 10008 +10020 | 10008 diff --git a/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa new file mode 100644 index 0000000000..36099686d8 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa @@ -0,0 +1,161 @@ +>gi|10001|ref|NC_01.1| Test sample 1 (root) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACGAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10002|ref|NC_02.1| Test sample 2 (dist to root = 1) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10003|ref|NC_03.1| Test sample 3 (dist to root = 1) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10004|ref|NC_04.1| Test sample 4 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10005|ref|NC_05.1| Test sample 5 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10006|ref|NC_06.1| Test sample 6 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10007|ref|NC_07.1| Test sample 7 (dist to root = 2) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10008|ref|NC_08.1| Test sample 8 (dist to root = 2) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA +TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) +CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT +CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC +>gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT +TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) +CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC + diff --git a/metagraph/tests/data/taxonomic_data/tax_input.fa b/metagraph/tests/data/taxonomic_data/tax_input.fa new file mode 100644 index 0000000000..3ba4796416 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/tax_input.fa @@ -0,0 +1,96 @@ +>gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA +TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) +CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT +CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC +>gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT +TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) +CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC diff --git a/metagraph/tests/data/taxonomic_data/tax_query.fa b/metagraph/tests/data/taxonomic_data/tax_query.fa new file mode 100644 index 0000000000..30ea3f2c27 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/tax_query.fa @@ -0,0 +1,800 @@ +@gi|10001|ref|NC_01.1|-9/1 +TGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATA ++ +CC=GGGCGGGGGGGJGJJGGJJJCJJJGJJCJJGGJJG=GGJJJGGC8GGGCCGCGCJJGGJ=1CGGJGGCJG=GGGJGGCCGGGCCGCGCGGGG=GGGGCCGGCGGGGGGG=GCGGGC1CGGCGCCGGCC8GG1GCGGGGGGGCGC==C +@gi|10001|ref|NC_01.1|-7/1 +CCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGA ++ +CC8GGGGGGGGGGJCJJGGJJJJJGC(JJGCJJGJGGJJJCGGGJJJJJ=CCJJ8CJ8CGCC=GGJJGGGGGGJGGGGCCGGCGCCGCGGGG1G=CGGGCJCGCGGC1GGCCGGGCGGGCGCGGCGGGGG=CCGGGGGG=CGCCGGGCCG +@gi|10001|ref|NC_01.1|-5/1 +AGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTCGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTC ++ +C=CGGGGGG=CGGJGJ1JJJGJJCGJJJG=JGC=JCJGJJGJJCGJJGGJJJGJJGGGGJG=CGCCGGC=GGGCGGGG8CCGGGGGGGGGCC8GGCG=GGCCCGG1GGGGGGGGGGCGG8GCCGGGGGGGG1CGGGGGCCCGGGGGCGGC +@gi|10001|ref|NC_01.1|-3/1 +CTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGA ++ +CC1GGGGGGGCGGJGJJGJJJGGGJJGJJJJJJJJCGJGJGGJ1JJGCCJGJ=JJ8CGJ8CGGJJGJCJ=CCGGC=GGCGGGGGCGGGGGGGCCG1GGCGJCGGCC(GGCGGG=CGG(GCGG8G1GGGCCGGGGGGGGGGGCCGGGGGGG +@gi|10001|ref|NC_01.1|-1/1 +GAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCCTTGAGGGCGTGTACTTAGCCCA ++ +CCCGGGGGCGGGGGGJJCJJJJJJJJJJ1JJGJGGCJJGJJJJCCCJJGGGJ=GJJJGGJJCCGGGGGGCCC8CCCC=GGGGCGG==GGGGGGGCCC8GGCGGC1GGCGCGGGGGG=GCGGGCCGGC(GGGGGGGGCC8CGGGGGGGGCC +@gi|10002|ref|NC_02.1|-9/1 +AAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTT ++ +CCCGGGGGGGGGCJJJGJGGJJCJJJGGJGJ=JJJJGGGJGJGJGJJJGC=JJCGGGGJGJGGGJ1JGCCCGJGCGC=CGCC==CGGGCC1CGGGGGGCGJCCGGGGGG=CGCCGGGGCGGCCGGGCG1GGGGGCCGCCCCCGGGGGGC8 +@gi|10002|ref|NC_02.1|-7/1 +TGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGA ++ +=CCGCCGGGGGGGGCCJJGGJJJJ8J1JJGGJJ1CJCJJGGG8CJCCGJGJJGGGGGGCJGGGGGGCGGGJGCJGG1GCGCGGGCGGGCGGGGC=(GCG=JCG=GG(GCCCGC=GG(GCCGGGC=1CGCGG=GGCGGCCCGGGGGGCGGC +@gi|10002|ref|NC_02.1|-5/1 +GAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAAC ++ +CCCGGCGCGGGGGJJGJJGJJJJGGJG=JJGJGJJJJGJJJJCGJJJJGJGGGJGJGJJ(J=J=JGGG1GCGGCJCJGGCC1GGGCG==GCGG81GGGCGCCCGGCGGGGGCGGGGGGGGCGGGGGCCGG1GGCGCGCGCGGGGCG1GCC +@gi|10002|ref|NC_02.1|-3/1 +CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGA ++ +CCCGGGGGGGGGGCJ1JJJJCJCGJJJJJJJGGGJJJJGJJJJJJGGGGJGGJGGGJG(GJJG=CGJJJCGGJGCGGGCGGGCG8GG=CGGCG8CGGCGCCC=C=GGGCCGGGGC1GC=G=G8GGCGGGCCGGCGGGGGGCGGCCCCGCC +@gi|10002|ref|NC_02.1|-1/1 +GGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCAC ++ +CCCGGGGGGGGGGJJJGJCJCJJJJJJJCCJGJJJGJCJJ=JJGGCGGJGC1JJGGJCJJCGJCCGGGGGGGCJCGCGC=GC=GCGCGG1GGGG=GCGGGCGGCGGGGC8GGCGGGGGGGG=CGGGCGGGG=CCG=GCGCCG=GGGCGGG +@gi|10003|ref|NC_03.1|-9/1 +TCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCG ++ +=C1GGGGGGGGCGJJJJGJJGJJCJJJJGJGGGGJJJGJJGCJJGJJCJJGJGC8JCGJCGCGJGGGGGCGGCGGGGGGG=CGCGGCGCCGGGCGGGCGGCGGGGGCCC=GCGGGG1GGC1C8GCGGGGGGCGGGCC=GCCGGGG=GGGG +@gi|10003|ref|NC_03.1|-7/1 +CTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAA ++ +CCCGCGGGGGGGGJJJJJJJGJJGJJGGJJJJJJGJJJJJGJ8JGJJJGGJG=GJJJJ=GGJCJJGGGCGGGCGJGGGGGGC=GGGGGGGG=G=GGGGGGJG=CG8GGGCGCGGCGCGCGGC18CGGGGGGGGGGCGCCGGGGGCGGGG= +@gi|10003|ref|NC_03.1|-5/1 +GGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAGAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAA ++ +CCCGGGGGGGG1GJGJJJJJJGJJCJGGCJGJGJJJJGJJGCJJ8=(JJJGGC8GCJGG(JJJCGGJJGGG=GGGGGGCCGGGGGGGGGCGG=CGGGGGG1CGGGCCCGGGGGC1CCGGGCGGCCCG8GCGGG=CCCGGCGCGGGGGGCC +@gi|10003|ref|NC_03.1|-3/1 +AAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCCAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTT ++ +CCCGGGGGCGGGGGGGCJJJCJJJGJJGJJJJGJJGJC(JJGJJJJJJJGGJGCGGGGJGJGCC=JGG=CCJGGG=GGGG8C=GCGGGGG=GGCGGCC=GJCC1C1C8GGGGGGGCCG=(GCGGGGGGGGCCGGCCGCC1CGGCCGGCCC +@gi|10003|ref|NC_03.1|-1/1 +ATTGCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCAAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAG ++ +CC11GGGGGGGGGJJJJJJJJJJGGJ1CJJJJJJ=JGGGJJGGGJ(JGGJJJGGGGCGGJCGJGJ1GCCGGGCJCCJCGCGGCCGGGGGGGGGGGGGC===GGCGCGGC=GCGCGCGGCC=8CGGGGGGGGGGGCC=GGGGGGCCCGGGG +@gi|10004|ref|NC_04.1|-9/1 +CTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGC ++ +CCC1GG=CGGGGGJJJJJCJGJJJJJJJJJJJJJJJJJJJG=JCJJJGGJJGGGGGGCJJGGJJJGGJGGGJGC=CGJCCG=GCGGGGGCGGGGGGCCGGJCG=G1CG1GGGGCC1CGG=(CGGGGG=CC=GGGGC8GG8GCGGGCGGGG +@gi|10004|ref|NC_04.1|-7/1 +CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC ++ +CCC1GGGGGGGGGJJ1JJJJGJJJGJJJJGGCJJCJGJJJJGJJGJJJGGGGGJGGGJ8=JCCGGCC1GCGGGCGGGGGJG=GC1GGGGGGGGGCGGGGGJCGCGGCCCGCCGGGGC8CGGGCGCGCCGGGGGGGGCGGGGGGCG1CG=C +@gi|10004|ref|NC_04.1|-5/1 +TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATG ++ +=C=GCGGGGGGGGGJJJJJJJJGJJJGJG1JJGCGGJJGJGJ8JJJGGGJ8CJCJGJJGCJGGCGGGCGGJGGGGCG=GGGG1GGG=GGGGGGCGCG8GGJCCCCGGGGGGGCGCGCGCGGGCGGGGGG=CGCGGGGCCGGGCCCGGCG= +@gi|10004|ref|NC_04.1|-3/1 +GTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAACCCAGGG ++ +CCCGGGGGGGG=GGGCCCGJJJJ=GGGJJJJGCJJJCJGGJJG1JJC8JGJGJCGCGJJJGGJGGGGGGJJJGC1G8=GGCCGCCGCGG8GCGGGCGGGCCCCGGCGGGCGGCCGG1=CGCG=GCGGCCGGGCGGCCG=G=GCGGC=GCC +@gi|10004|ref|NC_04.1|-1/1 +CAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAA ++ +CC8GGGCCG1GGGJJJJJGGJJGJJJJJGG1JJCGGJGGJGG1JJJJGJJGCGJJ=GJJJ=JGJ=GJJJCGCGG8GGGCCGGGGCGGGCGGCGGCGGCGGJCGGCGG1CGCGCGGGCCGGG8GGG1GGC8GGGGCGCGGGGCCGGGGCG= +@gi|10005|ref|NC_05.1|-9/1 +TAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTAC ++ +CCCGGGGGGGGGGJJJJJCGJJJGJJGJJGGJJ8GCJJGCJGJGJJJGJJJGGGGG8JGGJGGGGGGCGGCCJGCGG1GGGGGCGCGGG=GGGGCGCGGGJGGGG8CG=CGGGGCGGGGGGGGGGGCGGGCCG=GG=GGCCGGGCCGCG1 +@gi|10005|ref|NC_05.1|-7/1 +CCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGATTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTG ++ +=CCGGGGGGGGGGJ1JJJJGJ(JJJJJJJJJJGJJJGJGJGJGJGGGCGGGCGJG=JCJGGGCGCGGGGGCJGGGGJGGCCCGG=GGGGGCG(GGC=GGGJGGCGC=GC8GGC=GCGG=G1CG1GGCGGGGGG1GCCGGCGGGGGGCG8C +@gi|10005|ref|NC_05.1|-5/1 +TTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTCTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATT ++ +CCCGCGGGGGGGGJJJJJGGGJJGJJJGGCJJGJJJJJJGG=JGGJJ=J8GJJJCJG1JGCG8GJ1JGGGGJGGCGGCGJCCGCC1GG1GCCCCG(CGGGJCGG(=CGCCGGGCGGCGCC=CGCGGCGGGGCCCGCC=GGCGGGCCCGGC +@gi|10005|ref|NC_05.1|-3/1 +GTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGAGTTAATTTGCTTAGTAGTGAAAGTCC ++ +CCCCGGGGGGCGGJJJJJJJJJ8JCJJJJJGJJJCJCJG=JGJGJGJGJJ=JG=GGG1J=GJJGG8CGGGGGGGGGGGCGCCGGCGGGGGGGGGCGGGCGJG8GCGG=CGGCGCGCGGCGGCCG1GGGG8GGGGGCCGGCGCGGCGGCGG +@gi|10005|ref|NC_05.1|-1/1 +AGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGC ++ +CCC=GG=GGGGGGJGJJJJJJJJJJJJJJJJJ=J=JGCGJJJGGGJJCGJJJJCJJJGGGJGGJ(GGGGGJ1CCCGJCGGCCGCCC=CGGGGGCCGGG8=CG=GCGGCGGG1GCC=GCG1GCGGCG=GGCGG=G8GC(GCGGCCGCGGGG +@gi|10006|ref|NC_06.1|-9/1 +GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTA ++ +CCC1GGG1GGGGCJJJJ=JJJGJJJGJJGGJGG=GJJJJJGJJJJJGJ8JJJCJG1=GJGGGGJGJG8GGGJCGGCCGGCGGCGGGGGGG(GCGGGGCG==CGCGGG8CCGGGGGGGG=GG=GGGGCCGGGGCG=GGGGGGCGGGGCCCC +@gi|10006|ref|NC_06.1|-7/1 +GCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTA ++ +CCC1GGGGCGGGCJGJJGGJJJJJJGJJGCJJ=JJJCJJC=GJCJCGGGJJJ=GGJGCJGCGCCJJCJG8=GCGJGJG=CCG=CGGCGGC=GGC1G=CGGCCGGGGCG8CGC1GCGCGCGGGGCGGGGGCGGGCC=GGGGGCCGGGGGG= +@gi|10006|ref|NC_06.1|-5/1 +GTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATTTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAA ++ +CCCGGGGGGGGGGJGJJJGJJJJJGJJJCGJJJJGJJ8GJGJJJGJGCJJCGCJ(GGGGJGCCCGGCCGGJGGGGGG8JCGCGGGGGG=GCGGGGGGCGCJCG=CCGGCG=CGG8GCGGC=G8GCGGGCC=GGGGCCGGGG=GGGGG==C +@gi|10006|ref|NC_06.1|-3/1 +CAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTT ++ +CCCGGGGGGGGGGGJJGJGJJJJGGJGJJJCCJJJGGJGJGJGJGJJGJJJ=GJGJGGCGCCGGJGJC=GGGCGCGGG(GGGG=GGGCCGGGC=C1G=CGJGGGG=CGGCCG=GGGGGGGGC8GGCGGCGCGGCCGCCGCGGGGCGCGGG +@gi|10006|ref|NC_06.1|-1/1 +ATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAG ++ +CCCCGGCCGGGGG1GJ=JJJGJJJJJJGJCGJGJGJGGJJJ1JJ8CC=JJGJ1=GGJJJGJJGCJGJCJGGGGG8GGCGGGCCGCGGCGCGG8GGGCGCC1=GGGGCGG8GGCCCGCGGGGGCGGGGC=8C=CGGGG8GG=G=CGGGGCG +@gi|10007|ref|NC_07.1|-9/1 +CTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGC ++ +CCCGCCGGGGGGCGGJJGGJJGJGGJJGGCJGJ(JJC=G8JJ1JJGGGJJGCJJGCJGGJG==J=JGC8GCGGCGGGGGCGCCC=GGGCGGGGGG(G=CGJCCCGGGCGGCCGGGGCCGG1C=GGGGGGC1GGGGCCGC=CGC8CGGGCG +@gi|10007|ref|NC_07.1|-7/1 +GCAGTAGCAGACAAGTTTGAATTGGGCGAAACCTACTTGCTTCCTCTTGGAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAAT ++ +CCCG=GGGGGGGGJJJJJJJGJ=J1CJGGJJJ=JJCJJJJGJJJGCJJJGGJ8GJ8GCGJJJGGGJJGGGC=CGGCGG=G8GG=GC=GCGG=GGGGGGCCCCCCCGGCGGGGC=GCCGGGC=GGGGGC=GGCG1GCGCCGGG1=GGCCCG +@gi|10007|ref|NC_07.1|-5/1 +TGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCC ++ +CC1GCGGGGGCGGCJJJGJJJJJJJJJJJJJJJJJ=JJGJGGG(GCJJGJJGGGJGGGJCJGGJGGJCCCGGGCGGCCGGGGGCGCGCGGGG8CGGGCGGJGCGGGGCGGGGGGCGGGCGCGGCGCGGGGGGGGGGCGGC8G=GCCGGCG +@gi|10007|ref|NC_07.1|-3/1 +ACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAATATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAA ++ +=CCGCGGGGGCGGJJJGJJJGGJGJ1GGGGJGJCCJJ8JGJJJJJGGJG1GGCJJJJJJ=GGG1GGJGGGGCGGGCGCGGCCCGGGCGGCGCGCCGG=G8JGCCCCCGGCGGC=CCGGCGGG8GCC=GC=GGG=C=CGGGGGGCGGGGCC +@gi|10007|ref|NC_07.1|-1/1 +AACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGCTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCT ++ +CCCGGGGGCGG=GJGJJJGJJGGGJGJJGCJJJJGGGGCGJJGGJJJGJ8JGGCJJGCGJGJGJCGCGGGJGGGGGJGGGGG(GGGGGG1=CGC==GCGGJCGGGG=G8GGGGGCGCGCCGCGGGGGCGGGG8GG1C1GC1GGG18GCGG +@gi|10008|ref|NC_08.1|-9/1 +CCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCC ++ +CC=GGGGGGGGGGJJJJJJJJGJCJJJJCJJJCJJJJGJ=JJJG8JJJGGJCGCGGJCJJ8GJCCG=GGGGG=GG=CGGGGGGCGGGG8GG=GCCCGCGGJC1G8GGCCGGGGCGCCGGC1=CGCGGGGGCGCCGC8GGCCC8GG8GGGG +@gi|10008|ref|NC_08.1|-7/1 +AAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGG ++ +C=CGGCGCGGCGGJJGJJ1J1JGJJGJJJJJJJJJGJCGJJGJJGJJCGJJGJ=JGCGJG(G=GGJJG=CCGG=GGGCJ=CGGCGCG=GCGGGGCGGCGGJGG18GGGCGGGGCGGCCGGGGCGCGG=GGGGGGGGGGGGGCGCGCGGGG +@gi|10008|ref|NC_08.1|-5/1 +TGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTA ++ +=CCGGCGGGGGGGCG(JCGGCJJJJJJJJJG1GCJCGJJJJJJG8GGJGJJGJJJG=GGJGGJGGGGJGCGCCGGCGJG8GGGC=GGCGGCCCCGGGGGGJCGGGCGGCGGGG8C8G=CGGGGGGGGGGG=GGG=G=GCCGGCCGGGCGG +@gi|10008|ref|NC_08.1|-3/1 +TTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATG ++ +CC=8CGGGGGGGGJJGJGJJJJGJJJGGJJGGJGGJJ=JJGJ=JJGGGJ8CGJG=GJCJGJJGGCJ=JJGGCGGCGCGGGGGGCGGGGGGCGGGGGG1GCJCGC(GGCCC8GGGCGG=GGGGGCGGGGGGCGGGGGGC=(CCGCGGCC=G +@gi|10008|ref|NC_08.1|-1/1 +AGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCT ++ +CC=GGGGGGGGGGJGJGJCGJJGJ=JJGJJJ=CJJCJGGJJ=GJJGGJJGGGCGJCCGGJG8JGGGGGJJGG8JGGCGGCGGGGGGGG=CCGC=18GGGGJCCGCGGGGCGCCGGGGGG=GCG1CG8=CGGGGGGGCGG==CCCG8GGGG +@gi|10009|ref|NC_09.1|-9/1 +CCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATG ++ +CCCGGGGGGGGGGJJJJJJJJGJJCCJJJJJJJJ=GJJJG(GJJ8GGGJG8JGJCJJJCGJCCG8CGGGC=GGGGGJ18GG=G=GGGGGGCGGGG1GGGCJGGCGGGGGGCGGGGGGGC=GGGGGCGCGGGCCGGGGCCCGGCGCCGGGG +@gi|10009|ref|NC_09.1|-7/1 +CTGCGAACAGCCCCAGCCCCCTTGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTT ++ +CC=GGGGGGGGGGJJGJJJ8JJJGGJJGJJJGJGJJGGGJJJJJJJJJJJJJCJJGGJGGGGJCGJG1=JGGGGCGG=GCG=GGGGCGGG1GGG88GG=GJGGCCG(GGGGGGGGCGGGCGGCCGCCGGCCCCGGGGG=G=GGGGCGC=C +@gi|10009|ref|NC_09.1|-5/1 +TTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTCCTGCTACTGCCGAAGTCACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGATATAGACA ++ +CCCGGGGGGGGGCJJJJGJJ8JJJJJJJGJJ8JGJJJCGGJJJJGJGJJGGJJ=GGGJG(GJ8JGJG8JJGGGGGGG8GGCCGGCGCGGGG=GG(GGGGGJCCCGG=GGGCGGGGGG8GCGGC=GCGCGGGGGGGGGG8CGC=CCGCGGG +@gi|10009|ref|NC_09.1|-3/1 +GGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGC ++ +CC1C=GGGGGGGGCJJGJGJGJJGGGJJJJJJGJJGGJJCCGJ8GJJJJJCJGGCJJGGGCGGGGGGGJGG1GG8CCGGGGGGCG1G=GGGGGCCCGGGGCCCGGGC(C1G1GGCCGCCGG=CGCGGGGGCGGGGGCGGGGGGGGG=CGC +@gi|10009|ref|NC_09.1|-1/1 +TGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCATAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTGCTGCTACTGCCGAAGT ++ +CCCGGGGGGGGGGJ=GJGJGJJJJJJ8GJ1=GGJJGJ8JJJJJ1JJGGJJGGGGGGJGGGGGJGGGJJGGGCGCCCGGGG888GGGGGGGGCGG=CGGCCJGCGCGGC=GG=GGGGCCC=G=GGGGGGGCGCCCGGGGGGGCGG=GCCGC +@gi|10010|ref|NC_10.1|-9/1 +CTTGAAAAGTTGTAACCAAACGTACGAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAATATGACTTA ++ +CCCGGGGGGGGG8JJGJJGJJJCJJ(CGGJJJGJ8JJGCGGJGGG=GJJJJGGJJJJJCJGGGGCGC(GCGG8GGGG=CGGGG8GGGCC==GGGCGGGGCJ=GGGGCCGGGCCGCG=C8GCCGGGCGCGGGCGC=GGCGGGGGGG=G1CG +@gi|10010|ref|NC_10.1|-7/1 +ATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCC ++ +CCCGGGGCCGGGGJGJJGGJGJJJ1=CJGJJJJJGCG8GGJJJJJJJJCJGGJJCJJGGJGCGGGJGJJGCG=GGJGCGGCGG=GCGCGGC8GGGCGGCGJC8G=GGGGCGCCGCGGCGGG=1=GGCCCGGGGGGGCGGCG1CGGGC=GC +@gi|10010|ref|NC_10.1|-5/1 +CCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAA ++ +CCC1GGGGGGGGGGJGJGJG=GJGJJJJG=JJJJJC8JJJJJJGGGGJJC1JGGJJJGGGGGCCJJGJGCGGCCGGGJCCCCGGGGCGCGGGCGC=GGGGJCCGGGGGGGGGGGGGGG=GGGGGGGGGGGGCCG=CGGGG=GCGCGGGGC +@gi|10010|ref|NC_10.1|-3/1 +CAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATT ++ +=CCGGGGG1G=GGJJGGJJJCGJJJJGJJCCCGJJGJGJJGJ=JJJJJGJJGJGJG=C=8CC=GGGGCGGGGGGG=GCGGGGCG=C=CC=CGCCGGGGGGJCCCCGCGGCGGG8==CCCGCGGGGGCG1GGGGCCGGGGGGGCGGCGGGG +@gi|10010|ref|NC_10.1|-1/1 +TAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGG ++ +CC=CGC=GGGGGGGGJJJJCGJJCJGJJCJJJJJCGJCCJ=JCGJGJGJGGC=GCJGGJGGCGJGG=GGC8GGC8GGGGC=GCCGGGG=G=GGGGGCGG=J(GGCGGGGGGGGG8CCGGGGGGCCCGGGGGGGCGGCGG=GCGC=8CGGG +@gi|10011|ref|NC_11.1|-9/1 +AATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTC ++ +CC=GGGGCGGCGGJ1JJGJGJJJJGJJG1JGJGJJJGGJ8JJJCGGGCJCGGJ=CCJGCGCGGGJC=CGJCCGGCCCGGGGGCGGGGG8GCCGGGGGGGCJGCCGGGG=GGGGGGGC=GC1GGGGGGCCGGGCGGGCGGCG88CGGG=GC +@gi|10011|ref|NC_11.1|-7/1 +AAGTCAAATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCCAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGA ++ +CCC1GGGGGGGGGJJ1JCGJJCJJJJGJJJJGGGGGJJJJGCG=JG=JGGJJGGGCCJJGGGJGGCGGCGGCGGGCJGCCGGCCCGGGGC=GG8CGCGGGJCGCC1GCGGGCGGGGCG=G=CGGCGCGGCGCGGCGGGGGCCCGGGGCCG +@gi|10011|ref|NC_11.1|-5/1 +CAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG ++ +CCCCCCGCGGGGGJJJJJJJJJJJJJGJJJ=JJGJCGGGJJGJGJJJJGGGJCJGGCGG1=GG8J=CCCGGJGGGG1CCCGGCGCGCGCGGCCGC=GGCCJG=GGGCGGGGG=GCCGGGGCGGGG=GGGGGCGGGCCGG=GCGG=GGGGC +@gi|10011|ref|NC_11.1|-3/1 +GTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGT ++ +CCCGGGGGGGG=GJCJJJGJJJJCJJJJJJJJGGJGJGJGJGGJJGGJJGGJJJJGGJGJCGGGGCGGGGG8GGGC=GGGGCGGGG8=CGGGGGGGGG=G1CGGGGGGGGC1GGGGCGGG8GG8=GGGCGGCGGGGCGCGGGGGGCCGGG +@gi|10011|ref|NC_11.1|-1/1 +GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAACGGTCACTCTTGTGTCTATCTCGCATTCA ++ +CCCGGGGGGGGGGGJGJJJJJGGJG8JJGJJJJJGJ8JJJGJJJJGJJCGGCJGGJGGGJGGGGCGGGGCCGJGCGGJG8G=CCGGCC=GGGCC=GGGGCJGGGGGGGGGGCGGG1GGCGC(GGCGGGGG=CGG=CGGGGCC8GGGGCCG +@gi|10012|ref|NC_12.1|-9/1 +CGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTA ++ +CC=GCGG=GGGGGJGJJGJJJJJ8G=JJJGJJJ=JJJJJGJGJJ=JJGGJJJJJJCJGJGJJ=CJGGGGCGCGGGGGGGGCGGCG=GGGCGGCGGCGCGGJC=GGGCCCCGGGCGGC8CGGGGGG=GCGCCGGGGGCCGGCG=CGGG8GC +@gi|10012|ref|NC_12.1|-7/1 +CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC ++ +CC8GGGGGGGGGGJJJJJJJ1GJJJJJCJJ1GGCJJJJCGJJCJJJGGJJGGCGGCCGCGCCJGGGCGGG=GCCGG=GCGGGCGGGGCGCG=GGGGGGGGJC=CGG1GCCGG1GGGCG1CGGCCCGGCCGGC1GCGGCGGGGGGGCGG8G +@gi|10012|ref|NC_12.1|-5/1 +ATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTA ++ +C=1=1GGGGGGGCGJJCJJGGGJJJJCJJJJJJJJJJJJJCJJCGJ(GGJGJJJJJJ8GGGCCCC8GGGCGGCGGCGGGGCGCCGC8GCGCGGG(GGGGGJGC8G=GGGGG=GGCGCCGGGGCGCCCG=GGGG1GGCCGGCCGCCC8CGC +@gi|10012|ref|NC_12.1|-3/1 +GTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAA ++ +CCCGCGGGGGGGGJJJGCGJJJJJJJ=JJJCJJJJJGJJJGJCJJCGJGJGGGGGGJGJJ=CGJGCGJ1GGG8J=G8GCCCGGGGCCGGCG(CGGGGGGGJCGGG=CGGGGGCGCCGGCGGGGGCGGG=GGGCGGGGGCCGCGGGC=GGC +@gi|10012|ref|NC_12.1|-1/1 +ATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGT ++ +CCCGGG=GGGGGGCJJJGJJCGJJJJJJJJJGJJGGJGCGGJGJ8GGGJGGJGJGJGGGJCJ=GGCGGGJJGGCGGGGCCCG1GCGGGG1CGGGGGGCCCCCGGGGG=CCGGGGGGGGGGGGGGC=CGGGGGGGGC=GCCGGGCCGCGCG +@gi|10013|ref|NC_13.1|-9/1 +CGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAG ++ +CCC8CGGGGGGGGJJGCGGJJGJJGGJJJJG(J=CJJJJJGGGGGCJGGJJJ=GGCGJCCGGCGGCCJCGGGCGJGGGGCCGGGGGGCCGGGCGCGGCG(==CCGGGGGGGC=GGGCCCG8GGCCGGGGGGGGGCCGGCGCCCC=GCC=C +@gi|10013|ref|NC_13.1|-7/1 +GACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC ++ +CCCGGGGGGGGGGGJJJJ1GJ8JJGJJJJJJG=CGGJJJCJJGGCJ=JJ=CC=J8GGJC8GGGGGGGCGCGJCGCCGC=CCCCCGGGC=GGGGGGGCCGGJGCGGG=CGCG=CGG=CCGCGGGGGGGGGGCCGGCCCCGCC=CGGGGCG1 +@gi|10013|ref|NC_13.1|-5/1 +AAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACAT ++ +C1CCGGGGGGGGGGJ(GJJJG=GC8JCJJJJGJGJGJJGJGJGGCCGGGGJJG=JGJJJJGJ(G8GGJC(GGJGG=JCGGGCCGCGC=GCGGCGGCCGCGJGGCGGGGCGGG8CGC=CCGGCCGGCGCGGGGGGC=CGGGGGCGG=GGCC +@gi|10013|ref|NC_13.1|-3/1 +GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTC ++ +=CCGGGGGGGGG=GCJJJJ1JJGGJCGJGJJJGJJJJJGGGGJGJCJGJGGGGJJJJJG=JJGJJCGG18GCCGGGGG=GGGCCCGG1GC=G8GGGGCCGJ=1GGGCG8GGGGGCCGGCGCCCCGG=G8CCGGCGCGC=GGGGGCGG8GG +@gi|10013|ref|NC_13.1|-1/1 +CAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTT ++ +=C=GGGGGGCG=GJJJGJ8JJGJJJJCGJJJJJJJGGJJGJGGJJJJCGJJJJGCJJCJGJGCGGG=GCGJGCCGCGCC=GGGGGGGC=CGGGGGGGGCGJCCGC=GCC8CCGG1CGGGGGCGG=C=GGCGCGGGGGGGCGCGG=GGGGC +@gi|10014|ref|NC_14.1|-9/1 +ATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTA ++ +CCCGG=GGCGGGGJJJJG8JJJJCJ8JJGJJGJCGJCGGGJ8CGJJJJJJJGGGG=GCGGJ=GGCJCCCGGCGGGGCCC1C=CGCGGGG1GG=GGCGGGGJGGGCGGGGGGGGGC=GGGCGGGGCGGGGCGCCGCGGGGGCCGG=G=GGG +@gi|10014|ref|NC_14.1|-7/1 +TACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGCCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA ++ +CCCGGGGGGGGGGJJJ=GJGGGGJGJGJGJGJJGGGJ1JGGJJCJJJJJGJ=JCGJJGGJCJGJ1G=GGJCJGJ=GGGGGGGGCG(CCGGCGGGGGGGGCJC=GGCCCCGGGCCGGC==GGCGCGGGGCCGGGGGCGGGGGCGGGGGGGG +@gi|10014|ref|NC_14.1|-5/1 +TCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGC ++ +CCCGGGGGGGGGGJGJJJCJJJJGJJGCCJGGJJJJJGJGJC=JJGGJCGGJC8JGJJG=GGGCGJJJCGCGGCGGGCGGGGCGGCCG=GGGGCGGCGGCJ==GCGCGGGGGGGCGGGG=GGGGGGGGGGC=GCGCGG1GGGGGGGGGCC +@gi|10014|ref|NC_14.1|-3/1 +ACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAATTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATG ++ +CCC=GCGGGGGG8JJJJJJJGJJJCJJGJJ1GJJGJJJJJJJCJJCJJJCC=(JJ=JGGJGGCGJGCGGGGCGG8=GGGGGGGGGCGGGCGGGG=GCCCGJC=C8CCGGG8GGG=GGGGGG=1GG==CCC==GCCGCGGGGGGCGGGGCC +@gi|10014|ref|NC_14.1|-1/1 +GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACT ++ +C1CGGGGGGGGGGJJGJ=JJG=JJGJJJGJJ1JJGJJCJGJGJJJGJGG=GGCJGGJCCJJJ(GGG=GGGGGGCGGGGGGGCGGGC=GCCGCCGGGGGCCJGGC=GGGGGCGGCCGCCCGGGGGCGGGC1CGGGCGC=GCGGGGG8C=CC +@gi|10015|ref|NC_15.1|-9/1 +GTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAG ++ +CCCGGGGGGGGGGGCJGCJJJJJJGCJGJGGJGGJJJCJCJGGGGGJGGJJJGJJJJGJGJCGCGCCJGGCGGG==G=GGGGGCCGG=CGGCGGCCGGCCCC=GGCGCCGCGG=C=G=CGGG=GCGGCGGGGCGGGGGG8CCGCC=CGCC +@gi|10015|ref|NC_15.1|-7/1 +ATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCATTACTT ++ +CCCGGGGGCGGGGCJJJCJCCGJJGJJJJGGGGGGJJJJJJGGGGJGGJJGJCGGGGGJ1GGGJGGGJGCG(GGC8CGCGGGGGGGCCCCCGGGGGCGGGJCGGGGGCGGGGCGGCCGGGGGCCCCCGGCCGGGGGGGGGCGGGCGGGCC +@gi|10015|ref|NC_15.1|-5/1 +GACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC ++ +CCCGGGGGGGGGGGGJGJJGJJJJJGCJGJJJGJCJJJJGGGJJGJJJCGJ8JJJGJJJ(GJGGGGGCGCGJGGGGGGGGG=GGGGCGGCGGGCCC=GGGCGGGGCGCGG=GGGGGGGCGG1GGCGCCGG=GCGGGGGCGGCCGCGCGGG +@gi|10015|ref|NC_15.1|-3/1 +GATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTT ++ +8CCGGGGGGCGCGJJGJJJJJGJCJGGJJJGJGGGJJJ1JJJJGGGJCCG8JJGJCJCGGJJJCCC=G8=GJJGCCGGGGCGG1GGCGGGGGGGGGGGGG=CG8GCGG=GCGGCGCGGGGGGGCGGCGGGCGCCGGGGCGGGGCGCGGGC +@gi|10015|ref|NC_15.1|-1/1 +TAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGAAATTTCATGGGT ++ +CCCG=GGGGGCCGCJJJCGJCJJJJJJGGJJGGJJGJJJGGJGGJJCJJGCJGGGG8=JGJGGJG=CGGGJGGCCCGGGGGGGCGGGGGGGGGCGGCGGGJCGGGCCGGGCGGGGCGGGGGGCGGGG8GCGCGGCG8C1GCCC1G(CCG8 +@gi|10016|ref|NC_16.1|-9/1 +TGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACC ++ +=CCCCGGGCGGGGJGJCGJJJJJJJJJJJJJJGGJJJJGJJJJGGJJGCJCJJJJJCJ=JGGG1GGGCJGGJGGCGCCCGGCGG=CGCCG=G(GGGGCGGJGGCCC8GCCCCGGGGGCG=GGGGGGGG=GGGCCCGGCCGGCG=GGGG=C +@gi|10016|ref|NC_16.1|-7/1 +CTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCT ++ +CCCG8GGGGGGGGJJJJJJJGGGJ8JGJGJJGJJJJJGJJJGGJJJCGGJGJGG8G=GGGCCGJGCGCGCGCGJGCGGGCGCGCGGGGC8CGCGCGGG=GJGGCGGGGGGGGGGGGC8GG=GGCGG=GGC1CGGGGGGCCGCGGGCCGCG +@gi|10016|ref|NC_16.1|-5/1 +CCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCA ++ +CCC1GGGCGGCGGGGJJGJJJJJJJJJCJJCGJJGJJJJGJ(JJCJGJJCJG(G=JJJJGGJJ=GCJGCJGCJGC8=GG=GGGGCGGCGCGCCGCG=CGCCCG1GG=GG1GCGGCCCGGGGCG==GG=CGGCGGCGGCCCC=GCGGCGCG +@gi|10016|ref|NC_16.1|-3/1 +GCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCC ++ +CCCGGG=GGGGGGJJGJJJJGJ1JJJJGJ=GJJJCGJGGJJJJJGCCJJGJJCJJGGJJ=JJGGJJGGJCGJJCGCCJCGG=GGGG=GCGG=GGGGCGCCJG=GGG1CGCCGC1GCGGGGGG8GGGGGGGCCGCGGCGGC=GGCGGGGGC +@gi|10016|ref|NC_16.1|-1/1 +CAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG ++ +CCCGGGGGGGGCGJCJJCGGJJJJJJJJCJJ1JJJGJJJGJJGJGGJ=GJGJJJGGGCJGJGGGJGGG1G1GCCCGGGG=GG=CGGGCGCGGGCGGGGGGJCGCGCGGG=GGGGGGC(GGCCGGCCGGGCCGGCGGGCGCCGGCCCGCCC +@gi|10017|ref|NC_17.1|-9/1 +CTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAA ++ +CCCGGGGGGGGGGCCJJGJJJGJJGGJJGCJJGGGJJGGJJGJGGJCG==JJGGGGGCCJGJGGCJCJCGGGGGCCGCGGGG=GGGCGGGGGC8GGGGG=C=GCG=G1G1GGGGGG=G=GGGGGGCCGGGGCGCCGGGGGGGGGGCGGC= +@gi|10017|ref|NC_17.1|-7/1 +CCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGA ++ +CCC=GGGG(GG1G1JJ8GJJGJJJGJGGGJGGJGGJJCJJJJGJJJGJ1GGG=1CGJGGCCJGJJGGGGGGGGCCCCGCCGGGGGGCG=GGCGGGGGGG=CCCGGGGCCCCGGGGCGGGGG=G=GGGCGCGGGGCGGCGGCCG1CCCGCG +@gi|10017|ref|NC_17.1|-5/1 +AGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATGGGATGCGAGATAGAAACCAGAGTCACCCTTTA ++ +CCCCGGGGGGGGG=JJJGJJGCJJJJGJCJJJCJJ(JGGJJCCJCJGJJ(G=CCJGG(GGJJCJGJJGCGGGG1CGGGCGGGGGGGC(GGGGGGG8GCGGJCC=CGG=GGG=CGGGCCGGGCGCGGCGCGCGGGGCGGCGCGGGGGCGGG +@gi|10017|ref|NC_17.1|-3/1 +GATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACA ++ +CCCGGGGGGCGGGGCJGGGJGGJCJJGJ8JJJGJGGJJJGJJGGCGGGGG=JJGJGGCCGG1CJGCCG(GGGGGCGCGCGGGGGGC1GGCG=CG=GCC=CJCGGGCGGGGCGCGCC8CGGCCGGCGCGGCGGGGGGGG8CGCG=CGCCCG +@gi|10017|ref|NC_17.1|-1/1 +AAATGTCTACATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGA ++ +CC=GGGGGCGGGGGJJGGJJGJG1JJJJJJJGGJ=JGGJGCGGJGJCJGJGGGGGCGJJJGJ=GG=GJGCCGGG=GGCGGGGCCCGCGGCG8=GGGGCG1JGGGGGG8GCGG=GGGGCGCGCCGGGCGCGGGGGGCGCCCGCGG=GGG=G +@gi|10018|ref|NC_18.1|-9/1 +ATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGC ++ +C=CGGGGGGGGGGJGJJJJJJJJJ=JGJGJ8=JJJ=JGJGJCCGJGJJJJGGGC1GJGJCJJCGGGGCG=GCCGGGCGGJCCGGCGGCGGGGGGGCCCGGC1CCGG=GCGGGGCG=GGCGCGCGCGGC=GCCGGGCCG8GGGGCCGC8GG +@gi|10018|ref|NC_18.1|-7/1 +CGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTGGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGG ++ +CCCCGGGGGGGGGJJJJJGJGCJJJJJJJJGJGJJJCJGJGJCJJJJG==GGCGCGGJ8CGGJGGJGGGGJJG=G1GGCGGGGGCCGGGGGCGGGCGGGGCGCGGCGG=CCGG=GGCGC81GCGGCCCGGG=GGGGCGGCG1CG=CCCGG +@gi|10018|ref|NC_18.1|-5/1 +TCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC ++ +CCCGGGGGGGGGGJJGJJCGJJJJJJJGJJJG8GJJJGJJJJGCGJGJJGJJJJJGGGGJC=CCGJG1CGGG=GGCGC8CG1CGGGGGCGCGGGGGGCGGJC=GCG=81CCGCG=GCGGGCGGG=GGGGGGC===GGCGGCGGGCGGGGG +@gi|10018|ref|NC_18.1|-3/1 +ATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTA ++ +CC=GGGGGG8CGGJJJJ1JJJJJGGJJJG8J=JJJJCGJJGJJGGGJJGJJ=CGGGJJJJJGJG=GGCJCGCJCCCG=CGG=GGGGGCGCCGGCCGGGGGJCGGGCGGCGC==G8GGGGGGCG=GGGCGG=C8GGG=GCGGGGGCGCCGC +@gi|10018|ref|NC_18.1|-1/1 +GCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCC ++ +C1CGGGGGGGG1G1JJJ=GJJJGGJG(GJJJJJGJGJGJGGCGJCJJJJJGGGJJJJJCGGJ=CJGGG(8GGC8CCGGGGGG=8CGGCG8GGGGCGGGGGJCGGGC=CG8GGGGGGGGCCGG=CGCCCGGGGCGGGGGCGGGGCGC(GG= +@gi|10019|ref|NC_19.1|-9/1 +CAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATA ++ +CCCGGGGGGGGGCJGGGJGJJCJJJJJJJJJJJJCGJJJJGJGGGGGGGJ1CJGJGGGGJC=G8GGGCGC=GGCGJGCCGGC8GGGCCGGGGGGGGCGCGCCCGGCGCGCCGGCCGCCGGGGCCCG=GGCCGGGGCGGGGGG=G1CGCGC +@gi|10019|ref|NC_19.1|-7/1 +CAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATT ++ +C=CGGGGGGGGGGCJJJJJJGJJGJJJGJJJJJ(JJJGGJJGJGGJJGJG8GJJG=JGGGCGG8GJJCC=G8GGGJCCG=GGCCCGCG=GGCCGCGG=8GCGGGGCGGGGGCGGCGGGGGCGGGG=CGCCCCCCGGGGC1GGCGGGGCC= +@gi|10019|ref|NC_19.1|-5/1 +CGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGA ++ +CCCGGGCGGGGGGJJJJGJJJJGGJJJ1JJG=JCJJCJGJGGGCJJ8GGJGGGJJ=GG=JCGJJGGGGGGGGCGJ=GGCGGGGGGC8GG=GCGG=GCGGGJGCGGGGCGCGCGG=GGCGGCCGGGGGGGGG8G=GCGGCGGCGGG8CCCG +@gi|10019|ref|NC_19.1|-3/1 +CAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGAACGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCG ++ +CCCGGGGGGC1GGGGJGJJG=JJGJGJJJGCJJJJJJCJJGJGGJJJ=CGGCCJGGCJGJCGGCJGGJG=GGGGGGCGGG1GGCGGGGG(GGCC=CGGC=JCGCCGGGCCGCGCGG1CCGCGGGCGGGGGG8CGCGC8GCCGG88GG==G +@gi|10019|ref|NC_19.1|-1/1 +TTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTG ++ +CCC=CGGGGGGGGJGJJJG8CJGGJJGGJJGJJ=JJGGGJJJJJCJGJGJJGGGJCGJGGGGC=GJJJCCGGG=GJ==GGGG(=CGCGGGGGCGGGCGGGJ=CGCGGC(GGGGGGGGGGGGGGGCGGGGGCGGGCGGGCGGC=GGGGGGG +@gi|10020|ref|NC_20.1|-9/1 +TATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATAATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACT ++ +CCCGCGGGGGGGG=JJJJGJGJJ=JGJCJJJJGGJGJCJJJ8GG=GJJCJGGCJGCCGGGCCGGGJG1JGGGG=GGCC=GCGGGG=GGGGCG1GGGCGGCJCGCCGGGGG1GCGCGGGCCGCGGGGC=GGCGG8C=GGGCGGCGGGGCCG +@gi|10020|ref|NC_20.1|-7/1 +ACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCA ++ +CCC=GGGGGGGGGJJJJJCGGGGCJJGJJGGJJJJJJJJJJGJJCJGJGGGGJGGGCJGGJGGGGGGCGC=JGGGGCCGGGGGGCGCCCGGCCGGGGC8CJ=CGGGGGC1GCGCGGG===G=C=G=GGGGGG=GGGGGGCGCCCGCGCC= +@gi|10020|ref|NC_20.1|-5/1 +GGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAA ++ +CCCGGGGGCGGGGJJGJJJJJJGJG=JGJJJJJGJGGJJGJCJCJGGGJJJGJC=JCJ1GGJGJ8=CCGCG=CCCGGGJGCGG==GGCGGGGGCGGC8GGCC1CGCCCG=G=8CC=G1CGGCCGGCGGCGGGGGGGGGGG1CG=CGCCGC +@gi|10020|ref|NC_20.1|-3/1 +ACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAG ++ +CCCGGGGGGG1CGCJJJGJJJJGJJJGGJJJGGCJ8GGJGCJGJ1JJJ=GGJJJJGCCGJJGCGGCGGGC1GCCCCCGGCCG=GCG=GGGCGGGCCGG8GJ8GGCGGGGGGGCGGGGG1GCCC8GGCCGCCGGGCGGCC=GGGCGCGCGG +@gi|10020|ref|NC_20.1|-1/1 +TACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA ++ +CCCGGGGGGGGGGJJJJJJJGJJJ=GJJ8J=JJJGJGGJJGGJGJJCJGCGJJGCJGGGGCGJJ=CGCCCJGGGGCGGG1GGGGGGGCG=GGGGGGG=GGCCGGCCGGCGGCCG=GGG8GGGGCCGGG1GGC=GGCGGG8GGCGCCGGCG +@gi|10001|ref|NC_01.1|-9/2 +CCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATACAGA ++ +=C=GGG=GGGGGGJJJJJJGJJJJGJJJJJJJ==JJ1JJJGJJGGCCJ=JCGGJGGG(JGGJCGJGG(G1GJGCCCCCCGCGGGCG=GG81=GCGG8=GG=CCJJJCC(GGC=GGC1GGG81G1CCGCCGGGGCGCGGGGCC=GG(GGGG +@gi|10001|ref|NC_01.1|-7/2 +GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTC ++ +CCCCCGGGGGGG(JGJCJJJJJJC(1JJGGGJCJJGJJJ1GGGGJJCJGJJGGGGGGGJGJCCJG==GG8GCCGG=CG8GGGCG8GCCCCGCGC=CCCG=C=CJJJJCGGC=GGGGCCGGGGGC=GCC=1CCCGGCGGGCCGCGCCCGGC +@gi|10001|ref|NC_01.1|-5/2 +AACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCG ++ +C8CGGGGGGGCCGG1JCJJJGGGCCJJJJGJJG=J1GGGJJJJJJJ1JJJGCCJCGGCGGGGJJGCJGGCGGGGGGGGGCGGG=GGCCG=GCGCCGGGG8C1C=JCJ1CCGCCGCGGGGGGCG=GGG=GGCCGGC8G1=CCGCGCG=8CG +@gi|10001|ref|NC_01.1|-3/2 +ACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCA ++ +=CCCCGGGGGGGGJGJJ8GJJ1JJJCJJJCJCGJGJGJGJJJJJGJC=JGGGGCGJG=JCCGGGCGJGGG8G8G=GGCGCGGGGCGGGGGGGC=CGGCG=GGJJCJ8GGG=CG1CGGG=1CGGGGG1G=GGGGCCGGCGGGG8GGC=C8C +@gi|10001|ref|NC_01.1|-1/2 +GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTA ++ +CC=GGGCGGGCGGJJGJCJJGGJ1JCJJJJJGJJJJJCJGJJJ8JJGGJJ=C=GCGGGCGG(J(JJGGJCJG=GGGGJCCGCCGGGGGCGGGGGGGG8CGC=CC(JCGGGGGGCGGGCCGGCGGGGGG8GCCGGGGG8CGG=CGCGC=1C +@gi|10002|ref|NC_02.1|-9/2 +ACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGA ++ +CCCGGGGGGGGCG=JCJJGJJJJJ=JJJ1J1JJG8GJGJJGGJGGGGJJJGGGC8GJGGJCGCJGCJGGJ(GCGCG=GC1CGGGGGCG1GCCGGGGGGCCG==CJCJGGCGCGCGGGC=GGC=GG=CG8CG1G(CCC1GCCGGG=CGGGC +@gi|10002|ref|NC_02.1|-7/2 +GAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAAT ++ +CCCGGGGGGGGCC1JGJJJ=JJJGJGJCJGJJJ1JJJGJJGGJGGGG==JCJJJ8GJJJJCGGGGJGGCCGCGGGGJCG1=GGGGGGGGCGGGGG=GCGGCCCJJJJG1CGCCG=GGGGC==CGGGGGGGGCCG=CGGCG8=GCGGCCGG +@gi|10002|ref|NC_02.1|-5/2 +TGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAA ++ +CC1CGGGGGCGGGJGGJJJJCJJJGJJGJ=J=JJCJGJGGJGCJJG1J8GJC=GGC=GGGG(G1GJGJGJ=G=CGJGGGG8GG=GCCGGGGCGCCCC1CGGCJJJ1J1GGGGC=GGCGG8GCCCGGGGG1GGCCGGCGGGCGCGGGGGGC +@gi|10002|ref|NC_02.1|-3/2 +TTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAG ++ +CCCGGGGGGGGGG=J==JJJJ1GGCJJGJJJJGGGJGJGJCGJGGJGGCGGJGGJGCGGJCCGGGGJGCCCGGCGJGCG=GGGGCGCGGGCC=GGGCGCGGCJ=JJ=GGGGCCCGGG=CG1G==CGGGCGCG==GGG(CGGGGGCGGCGG +@gi|10002|ref|NC_02.1|-1/2 +GAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTT ++ +CCC=GGGGGGGGGJCJJCGJGJJGGGJJJJCGJJJGCJCGGJCJGCJCGJJGGGJJGJC=CGGJ=JCCGGG1JGGCGGGGCCGGGCG18G=C8GC=GGCCGGCJC8CGGGGGGCGGG=GGGGGG=C1GGCGG(=CCCG=GCGC8CGGG=G +@gi|10003|ref|NC_03.1|-9/2 +CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCT ++ +CCCGGGGG=GGGGJG1JJCJJGJJJJJGJG1JGJCJG=GGJJ1CGGCGJGJJGJGGGJGGCGGGGG8GGCJGCJGGCGJCGGCGGGG1GGCGCGCG=CGGGC8JCJJGCG8CCCGGGGGGC=CCGG8GGGCGGGGC=GGCGGGGCGCCC= +@gi|10003|ref|NC_03.1|-7/2 +CATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCG ++ +C=CGCGGGGGGGGJGJJCG1JJJGGJGGGJGJJJG8GJJJJGC8=GJJGCJJJGCCGGGGCGGCCGCGCJ=GGCGGGGGG8CGG1GGG=C=CGGGG=GCCGCCJJCCG1GG=CCGCCGGCGGGGC8GCGGG=GGGCGCCG=1GCCGCGG1 +@gi|10003|ref|NC_03.1|-5/2 +CAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACGTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGT ++ +CCCCGGGGGGGGGCGJGJGJJJ1GJJJGGCJJJJCGJGJGGGGJGJJJCJGJJGJG=JGGJGGGCGGGGG8=GGG=JGG8CCGGGGCGGGCGCGGGCGC8=C=JJJJGCGGG=GGGCGGGGCCGGCC=GGCGGCGGCG=GGGGGCCGG=C +@gi|10003|ref|NC_03.1|-3/2 +CTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACG ++ +CCCGGCGGGGGGGJGJJJJGJCJJGJJJGGCJGJGCJJJJGGGJGJGGJGGGGCJJ=JGC=GGC==GG=JGGGG=GGGJGGCGCGG8GGCGGGG=GGGGGG81JJCJGGG8GG1GGGGCGC8G8GGGCCGGGGGCGGGCGGCCCC1GCG8 +@gi|10003|ref|NC_03.1|-1/2 +CTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAATATGACTTA ++ +=CCGGGGGGGGGGJG1GGJGJJCGJGJJC1GCJJGGJCJJJJGGJGGJGGJCJCGJGJCGJGGGCJJGJGGGGJC=CGGG8G1G=GGGC1G=GGGGCGGCGCJCCCJGGGC1GCGGGGG1GGCCCGGGCG1CG1GGC=CCGGCGGCCGGC +@gi|10004|ref|NC_04.1|-9/2 +TTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGGTAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAG ++ +CCCGGGGGGGGG=JJJJJJJJGJJGJCGCJJJJJGJJGJJJ=GJJGJJGGGGGGCJJGJJGGJCG(GCGGGCGJ=CCGCGCGGGCGGGCCGG8GGGGGGCGCCCCJJGCGCCGG8GGGGCGGGGGGGCG=CGCGGGCGCCCCG=CGGCCG +@gi|10004|ref|NC_04.1|-7/2 +GGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAG ++ +CC8GGGGGGGGGGGJJJJJGJ=GCJJCJJCJJJJGGGJJJCJJJCJGJGJCJ8GJGG=J=CJG=GJCJGJGJJGG8CGGGGGGGCGGGGGGGGCGGCCGGC=J=JJCGCGGGGC=CGG=CGGCG==GCCGCGGCGCGGCCGGGG=G8GG= +@gi|10004|ref|NC_04.1|-5/2 +GTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCCACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAA ++ +8CCGGGGGGG=G1GGJJJJJCJJGCJJ1JJCJJJCJJJJJGJJGCJGJJCJJCG=CCJC=JG=GCC=GCGGGGC=GCCGGGGGG=GGCG=CGCGGGGGCGGGCCCJJGGGCCCGGCGGGCGGGCCGGGC=GGGCC=G=GGGGGGGGCCGG +@gi|10004|ref|NC_04.1|-3/2 +GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGT ++ +CC=GGGGGGCG1GCJ=GC1GJCGCJGGJCJJJJJJCGJGJJCJGCJGGJGJGGJJ1CJGCGGCGGJGGGGGCGCG8G8GGGCCGCGGCGGGC=GGGGCCC=GJJ=CJGGGGGGGGGGGCGGGGGGG=GGGG=CGGCGGCCCGC=GC8GGC +@gi|10004|ref|NC_04.1|-1/2 +TTGCTTAGTAATGAAAGTCACAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGT ++ +CC=GG=GGGGGGGGJJG=J1JGGJGCGJGGJGJJJGJJGJG=JJGJGJJ=JGGJ=GJGCGGCGG=GJ=J=GCCGCGCGC=GGGGGGCGGGGGGCGGGGGGGCJ=(8JCGGCCGGCCCGGCCGG1GGGGGGCGCCGCCCGGCGC1GCC=GC +@gi|10005|ref|NC_05.1|-9/2 +AGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACT ++ +CCCGGGGGCGGGGGJJJJGJJJJJGJJJJJJJJJJGCGJGJGJGJ=JGJJGGGJJJJJJGGGG=GCCGG=GJGGCCGGGJG1CCGC=GGGCCGGGG1GGGGCCCJJC8C=8==CCGGGGCCGGGGCGGGC=GGGG=1C=CCCGCCGCGCG +@gi|10005|ref|NC_05.1|-7/2 +TACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAG ++ +C8CG=GGGGG=GGJGJ1JGJJCGJCJ8GG=JJCGJCJGGGJGJJCJGCGJJCJ=GJJGGJG8GGG8JJGCCCGG=GGG1=CGCCGGCGGGG=GCGGCGGG=(JJJJJGGGGGGG=GGCC1GG=C=1CCGGGGGGGGGCC=G8GCGGGGCG +@gi|10005|ref|NC_05.1|-5/2 +AGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAG ++ +CCC1CGGGGGCGGG1JGGJJJJJJGJJJGJJJGJJJJJJ=CJJJCJGGCJGJGCCJC8GGGGGJ8CGG=CGCCG=CGCGG=CCGGGGGGGGCCCCGGG1GGCJ=CJJGCCCCGGGCGCGGCGGG1GGGCGCCG1CGGGCC1GGGGCGGGC +@gi|10005|ref|NC_05.1|-3/2 +TTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAACCAGGGG ++ +=CCGGGGGGGGGGJ1JJGJGJJJGJJJJGJGGJGJGGJJCJGJ1JJJJJJJGCGJ=CJGGC1GJGGCG=JGGCCGJGGGGCG=GGGGCGCGGCCGGGG=GGGC=JJ=GGGCGCGC=GGC=CG8C=GGGG1GGGGGGGGGGGGCGCGGGG= +@gi|10005|ref|NC_05.1|-1/2 +TGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATC ++ +1CCGGGGGCGGGGCJGJGJCJJJJG=JJ=JJCJJJJJJJJJGJJJJG1CJCGGGCJGJJGGGJG1G8JCGGG1GGGGGGGGGC=CC=1CGGGCGCCGCCGGGJJJCJ=8=GGG8GCGGCGGGGGCGCCCC==CGGGGCGGGGGCGCGCGC +@gi|10006|ref|NC_06.1|-9/2 +AAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTT ++ +CCCGCC=GGGGGGCGJJJGJJ1CJJJGJGGCJJJJJJJGJGCJJCJJCJJJGJ(GGJGGCJGCJCGGGGJGG=CGC8GCGCCCCCGGC=CG11GGCGGG1GCJJJJJGCCCGGGGGCG=GGGGGGC==GG=GCG1GGGCG=GCGG=CGGC +@gi|10006|ref|NC_06.1|-7/2 +AAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTC ++ +CCCGGGGGGGGGGCJJGJJJJGJJJJGJJ=J=JG1CJGJGGJJCCJCGGGJ8JJJJCJGG1GGGGGG=CCGGCCGCJGGCCGCG=CGCCCGCGGG=GC=GGC8JJJJGGGCGGGG=GCGGGC=1=G=GCC(GG8CGGCC=GGGGGCGGGC +@gi|10006|ref|NC_06.1|-5/2 +CAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACGCTTTGT ++ +CCCGCCGGCGGGGJ=JJGGJJG=GJJJJJ1JGGJJGJCJJJJ=CJCJJG(J(CJG=GCGGCGGGCGGJGGGCGGGG1C=CCGCCGGC=G=GCGCGGGCGC=CJJJ=CGCCGG1=GGG=GGC=GGCCG(GCCGGGGCGGGCGGG(GCGCC= +@gi|10006|ref|NC_06.1|-3/2 +TCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACT ++ +CCCGGGGGGGGGGJJJGGJJGGGCJJJJGJJJJGJJJGJJCJGGGGJJGJGJJJJJCGGGJGJGGJ1GCGCGCGGGGGCJGGGCGG=G=G=GC8GGGGG=CGCJ=CCGGGGGGGGG=GGCGGCGGGGGCCGGGCCGGGGGCCGGGGGGGC +@gi|10006|ref|NC_06.1|-1/2 +GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGT ++ +CCCGGCGGGGGGGJGGJJGGGGJJJJJJGJCJJGJJGJCJGJJJGC=C=GGGGJJCGGGGGJGG=8JGJCJGGGCG(CG8GGG=GCCCGGGGGCGGCGCCG=JJJJCG8G1GGGGGGGGGGCGGGGGGGG8GGGCGCGGCCGGGCGC=GG +@gi|10007|ref|NC_07.1|-9/2 +TACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCAC ++ +=CCCCGG1GGGGGJGJJJGGJGJ1JG1GJCJJGJCGGJGCCJCJGGGC8JGJ(GJGGJJGJGGCGGGGCJGCGGCGCGGCC88CCGGGGGCGCGGG===C(GJCJJ8GGGCGGCCCCG8GGCCGGGC=GCCGC1CGGCGG8CCGGG1GC8 +@gi|10007|ref|NC_07.1|-7/2 +TTTAAGTCATATTTGCTACGGTGACCCTACTACTAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCA ++ +CC1GGGGGGGGGCJJJ1JJGGJGGJCJ=GGJCJ(JJJGJ=GJJGJJ=JGJJJJ=CGCCGGGGGJGCGGGGGGCCCG8CGGCGGGGGGGG1GCG=CCCGGGC1JCCJCGCGGCGGC1CGG1GCCCCGGGC1GG1CGC=CGC=GGGCGGCGC +@gi|10007|ref|NC_07.1|-5/2 +TCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGT ++ +CCCGGGGGGGGGGGJJJGCJGCJJ1JJJJJJJCCCJGJGGGJGGCGJCGJ=JJJCJGJG=GCJGG8GGGJJJGCCGGG8CG=(GGCGCCCCGGCGGC=GGGCJJJJJGGGGGGGCGGGG===GGCGGGGCCGGCCG=G=GGGGCGGGGGG +@gi|10007|ref|NC_07.1|-3/2 +GCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCTTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGGAGTGA ++ +CCCGGGGGGGGGGJGJJJJJGJGJJCGGJGJJGGGJCGGJJ1JJGGGCJGJ=JJJJ=JC1C8GGG=CGGG==GJGCC=C(CG=CGGCGGCG(GGGGGGC=CCC1C1CG=GGCCGG=CGG=CG=GCGGGCGGGCCGGC1GCGGGG1CGCCG +@gi|10007|ref|NC_07.1|-1/2 +AGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTT ++ +CC=1GGGGGGGGGGGJGGJGJJJGJJGGJGGJGGJ=JCJJGJGG=GCGJJGGJ=JGJJJJG8CGGGGCJCJGCCGCGGGGGGGG=G=GGGGGGGGCG8GGCCJJJJJCCCGCCGGGCGCGGGGGGCGCCCGGGG=C8GCGC=GGGG81CC +@gi|10008|ref|NC_08.1|-9/2 +CAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCT ++ +=CCGGGGGGGGGGJGJJGCGJGJJJGJJCCGGJJJGJJJGGJGGGGCGJGJ=GGGGGJGG8GJGGGJGCGJGGCJCGCGGGCG8G8GC1GG8GGGGGGGGGGC=CCCGCGGG8GGGGCG1CG88GGGCGGGGCG1G=CGC=GCCGGGGCG +@gi|10008|ref|NC_08.1|-7/2 +GTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTG ++ +C=1GGGGGGGGGGJJJJJJGGCJJJC8GJJJJJJJCGJJJJJJGJJJGGGGJGJG==GG==GGGGCGGJGCG=CGGCG8GCGCGGGGGGCGGGCC8G(GGCGJJCJCGCCCGGGCCCGGGCGCGGGGCGGGGGGCGCGCGGGGGGCGCCG +@gi|10008|ref|NC_08.1|-5/2 +TCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGC ++ +CCCGGGGCGGGCGGJ1JJJ=GGJJJJ8CJJJGJJJCGGGGJJGJ8CJJCJ(G8J1JG8JCGGG=CCCGGGGC=G(=JG8=G8GGGGCCG=GCG=CGCCCGGG=JJ=JGGGGCGCGGGG=C=GGGGGGGGGGGCGC1GCGG=C81GGGCGG +@gi|10008|ref|NC_08.1|-3/2 +TCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC ++ +CCCGGGGGCCGGGGGCGJGGJJJJJJJCGJCJJJGJ8JGCJJJGGJGGJJJJCCGCCCCGJGCGCGJC=CGGCG=GGGGC=GGCGCG=GGGG=GGC1GG=8GJCJ8JCGGGGCGGCGCGC1GG=CGCCCGGCGGGCGCGGGCGCGGGGG8 +@gi|10008|ref|NC_08.1|-1/2 +CCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGT ++ +CCCGGCGGG1GCGJJGJJGJJGG1JJ=CJJJGJJJGJGJJGGJCJGGGCGGJJGJCGJGJGC=GGGGGGCC8GC=GGG1G1CGGGG1CCCCGGCCCGGGG8GCJCJCG=CG=G=GGGGGGCGGGCG1GGGCGGGCGCGGGGCGGCGCGC= +@gi|10009|ref|NC_09.1|-9/2 +GAAGTGACACTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA ++ +CCCGCGGGG(=GGGGJGGJJJJJJJG(JJJJJJJJJJCCGCG=GJGJJGJC=G8JJCJJ=JGGCGGGCGGGG1GGGGGCGGGGGGCGGCGGG=GCGGGGCGCJJJJCGGGG=GGGGGGC8CG1CCCG==CCGGCGC8GGGCGGCCGGCC1 +@gi|10009|ref|NC_09.1|-7/2 +TAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGT ++ +CCCGCGGGGGGGGJGJG=GGJGGGJJGJGJJJGCGJJGJGJGJJGJ=GJGJJG88G1GJGJJ=CJGGGGG(GGGGCGJGGCGCGGCCGG1GGGGGGCGG18CCJCC8GCGCG=GCG=G1GGGGCGG88CG8CCG=CGC=GGGCGCGGCCC +@gi|10009|ref|NC_09.1|-5/2 +CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGG ++ +CCCGGGGGCGCGGJJJJJGJCJJG8J=GJJGJGJJJJGGJJGGGGJJJJCGCJCGJ=GCJGGGG1GGGGGGCJJGCJC=GGGGCGCCGGGGGG=GGG8GCG=CJ8J1GC=GGGG1=CCGGGCGGCCGGCCC=CGGGGGCGGCCGCCGCGC +@gi|10009|ref|NC_09.1|-3/2 +CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGAACCATAGATCCCTCT ++ +CCC1GGGGG=GGGGGJJJGG=GJJCGGGJGJGJCGJJJJGJC=JGG=CJGG8JGGJGJJCJGGGGGGGGGGCGJGGCGGCCCCGGGGGGGGGCGGGG8GGGC=JCJJGGGGG=CGGCGCGCGGGGCGCCGCGCCGCCCGGGGG1CGGCC= +@gi|10009|ref|NC_09.1|-1/2 +AGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAAC ++ +CCCGGGGGGGGG=JJJJJGJJGJJJJCJCJGJJJGJGJJGGGJGJJCJGGG=J8GGJJGJGJG=GGJJ8GCCC8GJGGGCC(C188GGG=GCGCGGGCGGCCJCJJCGC8GG8CG=C1G1=GCGGGGGC1CCGCGCGGG=1GG=G1CG8= +@gi|10010|ref|NC_10.1|-9/2 +AGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTA ++ +CCCGGGGGGGGGGJGJ=JJJGJC1G1CJ1JJGCJJJJGGJJCJJJJGJJGGJJJJ=G==GGJ=GGJGGCGJGGGGGCCG1G=JCGGGC=G1GC=CGGG8=C1JJJJJGG(GC=GGGGGC1GCGGCGCCCGCGG1C=GG1=GGCGGGGGGG +@gi|10010|ref|NC_10.1|-7/2 +CCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCT ++ +1CC=G1GGGGGCCJJCJGJJG1JJJGCJJJCJCGJJGJCJJCJJJJJCJGC=G8JJGJJJG1GJGJGGGGGCGCJCGGGG=GGCGGC8CCGGCCGG==GGG(CJCCCCGGGGCGGGCGGG8GCCCCGCCGCGGCGGGG=CG1C8GGGGGG +@gi|10010|ref|NC_10.1|-5/2 +GTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAG ++ +CCCGGGGGCGGGGJGJJJGJGGJJGCGJJJCGJJJJJJJJGCJCJJJGJ8JCGJJGJCJGJ8JGGG=GG8CJCG=GGGCGGCGCGGGGGGCGGC==GCGGCCCCCJC(GGGGG=GGGCG=GGGG1GGGG1GGG1G1GG8CGGGCGGCCGG +@gi|10010|ref|NC_10.1|-3/2 +TATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCA ++ +CCCGGGGGGGGGGJGJJGCGJJJGJGJGC=CGJGGJGJGJJGJ(1CJGGJGGGGGJG==JCC1JGCG(C(GGGGGGGCG=GCCGGGCC8G=CG==CCGCGG(CCJJJC=GC=GGGGGGGGCGGG1GCCGGGGG81CGGCGCGG8CGGGCC +@gi|10010|ref|NC_10.1|-1/2 +GCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCA ++ +CCCGGGCGGGGGGGJCG1JJCJJCJ=CCJCJGJGCJJCCJ1JJG=JGGJGCJ=CJJCJ8G(JJG=88GCCCCGCGJGGCGCGCCGCCGCCGCCGGCGG=G1C1JJJJGGCCGGG=GGGGGGC=GGGCCGCCGGCGCGGGCGGGGG=CCGG +@gi|10011|ref|NC_11.1|-9/2 +TGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCA ++ +CCCGGGGG=GGGGJGJGGGJJJJJJJJJJJJCJJJJJGJJGJGJGGGJJJJGGGJGCJJ8GGCGG8GGG==G=CGGGCCGGGGCGGGGGGGGCGCGGG=CGC=JJJCCGGGGGG=CCGGGGGGGGGCGGG(CG=GCGCGGGCCGGCG8CG +@gi|10011|ref|NC_11.1|-7/2 +TCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTGGCCGCATGTAGACATTT ++ +CCCGGG1GGGGGGCJ=JGJJCGJGJJJJJJ8JGJGJJGJ=JGGJJJJGCGJCGGGGCC1=C8JGJ==GCJGCGGG8GCCGGC1CGGGGGG=GGGCG=CCGCGJJJCJCGCCCCCG=GC8CC=G8=CGCGGGGGCGCCCGGGG=GCGGGGG +@gi|10011|ref|NC_11.1|-5/2 +ACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCCTTGTTTCGCTCATCGGAGTAATTTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTG ++ +8=CGGGGGGGGGGJJGJJGCJJGG=JJJJJGJJJJGJJGG=C(JJGJCGCC8JGGJ(CGJJJJGGGCJCGGGGGJCGJC=CG=G=GG=GGGGCGCGGGGGGG=JC=JC==CCGGCGG=GGGGGGGCC=GGGGGG=C8GG=GGCGCGCCG1 +@gi|10011|ref|NC_11.1|-3/2 +CTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAA ++ +CCCGGGGGGGGGGJJJJJJJJJGGJJJJJGJC=JGJJGGJJJJGGCJJJGJGCJJGJGCGGGGGGJGCCJGJGGCGG=GGCGGGCG=C=CGGGCGGC=GG8CCJ(JJGGCC=GCGCGGCGG8GGCCCGCGCCCG1CCG=GGC8GCGGCC= +@gi|10011|ref|NC_11.1|-1/2 +ATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGACACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCCCTCGAATAGCC ++ +CCCGGGGGGGGG1GJCJJJGGCJGJGJGJJGGGJJGJGCJJJJJJG=G8JJG=CGJGGCCCGGGCGGCGGJGGGGGCG(CGGGGG8GCGGGG1=CCGGGCGCJC8JJC8GCGGGCCGGGCGCGGCCGG=C8GGGCGC=GCGC1CGCGGCC +@gi|10012|ref|NC_12.1|-9/2 +GGCGTATACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTCGAATAGCCGCCGGA ++ +1CCGG1GGGGGGGJJJJGJJJCJJGJJGJGJJGJ(CJJJ8GGJGJJJJJJCGJ8CCGGGCJJGGGGGGC=GGG=GGGGCGGCG=CCCGGGCGGG=CGGGGGCJ=CJJ8CGGGGGGCG1CGGGGCCGG1=GGGGGGCGCCCGGGGCGCGGG +@gi|10012|ref|NC_12.1|-7/2 +GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTAA ++ +=CCGGGCCGGGGGJJJGJCJGJJJJGGJGGJGGJJGGJGCCJJJGJGJJGJCCG8GJCG=GCGG8=JGJGGJC=GG=GGGCCGGGGCGCGGG1GC=GCGGG8JJCCJCCC1GCGCGGGG=CGGCGCCGGGGCGGC=GGC8=CGGCGG=CG +@gi|10012|ref|NC_12.1|-5/2 +TGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGATACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTG ++ +CCCGGGGGGGGGG=JJ8JJJJGCGJGJJJGJJJCJCGG(GJGJGGGJGJJGGJGGGJCJGGJJJJG1CG8GGGCJ1GGGCGGGGCC==GGGGGGGG=GGGGCCCJJC=GGG=G=GCCGCGGGGGCGCGG=CCGGG=CCCG=GGGGGC=GC +@gi|10012|ref|NC_12.1|-3/2 +TACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGC ++ +CC=GGGGCGGG=GJJJJGJJJGCGGJGGJJJGGJJJJCJGGGGJ8=GJGGGJGGJJGCJC8GGCJGGJGCCGGGGGGG==8GGGCGCGGCGGGC8GGGG==GJJCC=C=CCCGGG=G=GCGGC==CGGG=GGGGGCGGC=GGCGG(GCCC +@gi|10012|ref|NC_12.1|-1/2 +AAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCGGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTT ++ +CCC=GGCGGGG8G=JJGJGJCJJJJJJJJJJJJJ1JGCGJJJJJJGJ18JJCGCJGGGGC=(=GJCCCCGGJCCG1GG8GCGGGGGCG(GG=GGGGGGG=GC=JJJJCGGGGGGGGCGGCGGCGCG=CGGCCGC=GCGGGGGCGCGGGGG +@gi|10013|ref|NC_13.1|-9/2 +GAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA ++ +C=CGG=CG1GGGG=JCJGJJCGJGJJ1J1CGJGGGJGJJGGGGGGJGJJGG8JJCCGC8GGGGGGJ8GGG(G1CCCGG8GCCGGCC=GG=CGGGCGGCG=GCJJJ=CGGCGGGGGGGGGC1GGGG=CGCGGCGCCGGGCC1=CGG=GGGG +@gi|10013|ref|NC_13.1|-7/2 +AATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATC ++ +CCCGGGGGGGGGGJJJJJCG1JGGJGJJJJJ1GGGJJ(JGJJGGGJGJJGCJJG8JJJGCGJCJCGGGGGC1GGGGCGGCCGGCCG=GGGCGCCCGGGCCGCCJJJJCGCCGGCC=CC==GCG(GGGGGGGC=CCGGG8GGCGGGGGG=1 +@gi|10013|ref|NC_13.1|-5/2 +GTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCG ++ +CCCGGGCGGGGGGG=(GGJJJJJGGGGCJGJJJJJCGJ1CGJJJJJJG=JCJCJ=GJGGJGJGGJGGCGGGGG8GGGCGG81GCGGGG=8CGGGGGGCCG8CCJJJ=CCG=CGGGGG=CGCGCCCG1GGCG1G=GCGGGCGGGGG=C=CC +@gi|10013|ref|NC_13.1|-3/2 +GAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGC ++ +C=CGGGGG1GGGGGGJJ=JJJJCJCJJJJGJJGJJJC1(CGJGCJJJGJJJGGJ8GGGGJGJJ=GGGGGGGGGJ1CGGGCGGGCGGGGCCGGCG1GGGCCGGCJJCCGCGGGCCCGC8GCCCCGGCCG=C8GCCG=GGGGGCGGGGGGGC +@gi|10013|ref|NC_13.1|-1/2 +TGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGG ++ +CCCGGGGGGGGGGGJG=JJ=CJGJGGGG=GJJJGCGGJJJJGJCGGGGGCGGJGCJJG8JGGGGCGJCCGJ8GGC8JGGCGGGCGGCCCCCGGGG=GGGGGCJJJ=JGG==CGGGGGGGG==G=1GCGGGGCGGGGGGGCGGGGG=CCGG +@gi|10014|ref|NC_14.1|-9/2 +GTAGCCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGC ++ +CCCG1GGGGGG1GGJJJJJJGJJJJJJJJCJGGJGGJJGJGJJJJGJJ1CGGJCJG8GCGGGJJG=GJJGJCCG8=CGGGG=GGCGCCCGGGCGGGGCGGGCJJJJ=GGGCGGGGCGGGGCCGCGC1GGGGCCGCGCCG=CGGCGCGGGG +@gi|10014|ref|NC_14.1|-7/2 +CACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCA ++ +CCCGGGGGGGGGGGGGJCJJ(GCJ=J1JGJGJJ1GGJJJ=J=JJGCJJCJJCGJJGGJJGC=GJ=GCJGG1CGG==G=G=(GCCGGGGGGGGGGGGCCGGGG=JJCJGCGGCGGGCGCCGGGGGGCCGGGGGGGCCGGGCGCGCG=CGCC +@gi|10014|ref|NC_14.1|-5/2 +TCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCCGACCCATAGATC ++ +CCCGGGCGGGCGGJJG1JJ1JJJJJJJJGJGJGGGJJGGCJGJJGGJGCJGJGJJGJGGGCGCGJGGGGJGGCC1GGGGGGCGGCCCCGGGGCGGCG=GGGCJCJJJGGGGCCCCGGCGCG8CCGGCGGC=GGGCGGCCG=GGC==GGGC +@gi|10014|ref|NC_14.1|-3/2 +ATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTAC ++ +8CC=GCCGGGCGGJJJJJGJJJJGJGJJJJJJGJJ8J1JGGJJJJGGGCJCJCGCJJJGGGCGJ8GJ1GGGGG=GGG=GGG1GGGGGGCCC=GGGGCGGGGGCJJCCCC1G=8CGGGGCC=CGG=C=CCGG=GGGCGGG8GG=G8=G8GG +@gi|10014|ref|NC_14.1|-1/2 +TTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCC ++ +CCCGGCGG=GGGCGGGCJGJJCGJJJJJCG=J(JGJ8GJCJGGJGJJGGGCGGG=JGJJG=JC=GGGGGGGCGCJCJJGCCGGGCGGGGG(GCCGGGGGG8CJJJJCGG=188GGCCGGGGGG8CCGGGGCGCGGGGGGGGGGGCCCGGC +@gi|10015|ref|NC_15.1|-9/2 +AATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGG ++ +CCCGGGGGGGG=GJJCCJJCJGGJJJJJGJGCJCC=GGGJJJJJ=JJCCJGGGGJJJGGC8GGGJGGCGJCGG=CGGCGGCCCGGCCGGGGGGGGCC1GG(GCJJJCGGCCCCGGG=(CGGGGCGCGGCCGC=CGG(CGGGGCGGGGGG= +@gi|10015|ref|NC_15.1|-7/2 +TATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGTAGTAATGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGT ++ +CC1GGGGGGGGGGGJJJG=JGG1CJCJJJJJJCJJJ8CGJG(JJGGJ8JJGJJGJGGG(=GGGJGCGGCGCGGCGGGCGCGCCGGCGCGGGG=GCG=GGCCCJJJJJ=GGGG=GGGGGCGCGCGGGGGGGGGCGGGGGGCC=GCGGCGCC +@gi|10015|ref|NC_15.1|-5/2 +CCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTA ++ +CCCGGGGGGGCGGJJGGJCJJJJJG1JJJ==JJJJJJGGJJG(GJ(JCJJJGGJCJGJGCJGCG8JCGCGCGGCCGCGGCGGGGCGCGGGCC=GGCGGGGCGCCCJJGGGGGCC(G8CGG8GGCCGCGGGCGGGG1GGGCCCGGCGCCCG +@gi|10015|ref|NC_15.1|-3/2 +GACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCAT ++ +CCCGGGGGC(GGGJJGJCCJJJGJGJJJJJGJJJJGGG1GGJJJCJCJJGGGCGJGGGCJCGJ=8JGCCGGGJGG=CGGG=GCCGCGGGGGGGG1GGGGGGCCJ==8CGGC8GGC=G=GC=GCGGCCGG1GGGCGGCGGCC=C=GGCCG= +@gi|10015|ref|NC_15.1|-1/2 +ACCTGGTTTAAGTCATATTGGCTACGATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAAC ++ +CCCGGGGGGGGGGCGJJGJJJCJCCJCGJJGGJJCJGJJJ=GJGGC=JGJGCJCG8GGGGJJGGGGC=GCGGGG(GG=GCGCCGCGGGGCCGCGGGGG(8GGJCJJCGGGGG=1GGGGCGGGGGG1G=GG=GGG8CCCGG=CCCCGGGCG +@gi|10016|ref|NC_16.1|-9/2 +GCGGCCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCAT ++ +CCCGGGGGCG=GGGJJJGGJ=JG(JCJJJJJJJGJJJGJ8JJJJJGJGJJGJJGJGJG8GGJGJCCJ=GG=GJGCGJGCGGGCC=GGGGGGGGGGGCG8GG=CJJJJGCGCGCGCCCC=G=GGGGCGGGGGG==GGGGGGCGGGCCGGG8 +@gi|10016|ref|NC_16.1|-7/2 +AAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACT ++ +CCCCGC1GGGGGGJGJJJJ1JJCJJJJJJGJJJG=JG=GJJGGGCJJJGGJGGJCJGCGCGCCCGCGGGJGGCGCCGG==CGGGGGGGG=GCCGGGCCGGC1CCJJJGCGGGGCCG(GGGGGCCGG=GGGCCGGGGCCGGCGCGGGGGCC +@gi|10016|ref|NC_16.1|-5/2 +TCTGGGTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGATAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCCGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTT ++ +CCCGG1GGCGGGCJJG1GJGJJGJCJJGJJCJC=JJ=GJGGJJG=CJ=GGGJGGJGGG8JCCGGCCGCGGGJ==GCCCCGGGGCCC8GG=GGCC(CCGGGGGJJ=CJCGG1GGGGGGGC1GGGC=GGCG=CG==GCC=GG=GCGGCCGCC +@gi|10016|ref|NC_16.1|-3/2 +GGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAG ++ +CCCGGGGGGGGGGJJJJCJJGJGJJCCJJJJJJJGJGJJJCJGJGGGCG8JJJJGJJGCG==GJGGGJGGGGGCCGGG=CGGGGGCCGGG=GGGGCCCCGG(=CCCCG=CCG11CGCGCCGC1GGGGCGGGGGGGCCCCCGCCGCG=GGC +@gi|10016|ref|NC_16.1|-1/2 +AATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTG ++ +=CCGGGGCGGGGGJGJJJJJJJJJCCJJGJJJJJJJGJJJJGJJGJ=JGJCGJC1JCJGGJGJCJG8CG8=CCGGGGGCCGGGCGGCGGGCCCCGC8GGCGCCJJCJCCGGGCG=GCCGGG=CCCGCCCGGGCCGGGC1GCGGGCGGGGC +@gi|10017|ref|NC_17.1|-9/2 +TGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAA ++ +CCCGG1GGG(GGGCGJJJGJJGJGJGJJJJJGGCGJCG8JGGGGGJ8JGGJJGJ8CCGG=CGCGGGJGGGCCCGGGCGGG8CCG=CGCGCC=GGG==CGGCG(CCJJGCC8GGCGGGG=G8GGG1CG=GGGGCGGGGG1G==CGGCGC8G +@gi|10017|ref|NC_17.1|-7/2 +ATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTG ++ +CCCG=GGGCGGGGJGJJJ1JGGJJGJGJJJGJJJJJJ=GJJJGJJJGCGGJJJGGCGJGJGJGJJJGGJGGJGGGGCGGGCCCGGG88GGG8GC1C=GG1GCJCJCJCCCGCCCC=CCCGGGGGCGGCGGCG8C=GGGCGG=8CCCCGGC +@gi|10017|ref|NC_17.1|-5/2 +AGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGG ++ +CC=CG1GCGCGGGJGJJJJJJJJJCJGGG=8JJCCJC=JJCGGCJJJJGGGJ8GGCJGGCGGCGGCJGCGCGGGGG(GCCGGCGCG8CCGGGGGCCGGG8G=CCJ=JGGCGGC8GGGGGGGCGGGGGGGGG=8=CGCCCGGCCGCGCGC= +@gi|10017|ref|NC_17.1|-3/2 +CATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGT ++ +CCC=GGGCGGGGCGCJJJJJGJJJJCJJGJJJGJJG=JGJJJGJJJGGGC=JG=GCCGJGGGCG=CJ=GC8GGGGG(GCG1C8GCGGGGGGCGCG8CGCG8=CJJJJGGG=G1CCCGGGGGGGGGGGCGGCGGGGGCC=G=CG=CGG8GC +@gi|10017|ref|NC_17.1|-1/2 +CAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAAT ++ +CCCG=CGGGGGGCGJJJJJJJJJJGJJCJJ1JGGJGGGJJJJJJCJGJCGGJJGJCGGC1JGGCGGGJGJGGGGCJCGGGJGGGCC=GGCGGGCGCGG=GGGCJJCJGGCCGCGGGG(CCCGGGGCCGCCGC=GGGCGCC1GCCG=GG=8 +@gi|10018|ref|NC_18.1|-9/2 +GGTGACCCTAATACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTT ++ +CCCGGGGGGG8GGJJJJJGJCJJJJJJCJGJGGJCJGJJJJJJGJG=JJGC8JJ8JGJJGCGGGCJCGGGGCGCGGGGGCGCCGGCGGCCGGGGG=GCG=(GCC8JJGGGGGG=CGGGCGC=CGG18=CGCCGCGGCCGG=GGGGCGC81 +@gi|10018|ref|NC_18.1|-7/2 +AAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTG ++ +CCCGGCGGGGGGGJJJJJGJJJJCGJGJ=JGGGJJJJJJJGGJJJGGGJGCJGJJJJCGJJG=CCJJ8GG1JGGGCJCGGGGGCGG=GGGCG=CGCCGCG1CJJ=JJ=GCGGCGCGGCCCGGGGGGC=CG8CGGGGGG=GC=GCGGCCGC +@gi|10018|ref|NC_18.1|-5/2 +AAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTT ++ +CCCGGGGGGGCGCG=GJJG8JJJJJJGGGJGGJJCJCGCCCC=GGCGJCJJ=GJGGGJGJGJCGGG8GGCGGCGGGGCGJ1G8CCGGGGCGC=GGGCGCGGGC=JJJGGCGGGG=GCGGGGCG=CGGCCGCCGGCCCGGGCGGGCGGCGC +@gi|10018|ref|NC_18.1|-3/2 +ACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCCTACTACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACG ++ +CCCGGGGGGGGGGJCJJGGJJJJ=JJJJJJ8JJJGJGJJGGGJJCJGJJJJGGCG8JCJJGJGGG(JGGG1G(GG=GCGC1GGGGGGGGGCCCGGGGGCGGCCCJCJCGCGGGG1GCGCG8CCCCG=CGCGCGCGGCCGGGGCGCCCGGC +@gi|10018|ref|NC_18.1|-1/2 +GAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCT ++ +CCCGGGGGGGGGGJJGJGJJJJJJJJJJCJJJJJGGGG=CGJGJGCCGGJC=GJGJGGGGCCGJJGCCCCCGJGGGGGGCGGCGGGGCGCGGGCGG8GGC8GJJJCCGGGGGG=GC1CGGGCGCGCGG=CGCCGCGGGCGGCCCCGGC=8 +@gi|10019|ref|NC_19.1|-9/2 +ACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCGGCAGTA ++ +CC=GGGGGGCGGGGJJJJGC=CJCJJGJJJGJGGJJJ(JCCJCJ1JGJCJJGJGGGGJ=GGC1JGGGJGC=GJCGJG1G=GCCGGGG=GGCCCCCGGGGGGG=JJJJGGCGCGCGCGGGCGGGGCGG=CCGC=GGGCGG8CGCGGCCGGC +@gi|10019|ref|NC_19.1|-7/2 +AGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTA ++ +CC=GGGG=CGGGGJJGGJJGJJJ(JJJ1JJCJJGGJJCJ1CCGGJJJJJJCGC=GGG(GCGJJGJ=CJCGJCJCGG1GGG=GCCGGGCGGGCCCGCC1CGG=JJCJJCGG1GCCCGG=1GGGGGGGC(GCGGGCGGGG=GGCGCCG1CG8 +@gi|10019|ref|NC_19.1|-5/2 +GAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTAGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATAGCCCCC ++ +CCCGGGGGG=GCCCGJGJJJJJJGCJGGJJJJJJGJGJGJJJJ1JJJGGGJJJGG(=JCJC=GCGJJJJGGGCGGCGGC8CC8CGCCG=GGCGCGCCGCCGG==CJCGGGG8GGCGCGGCGCGGGCG1GGCGGGG=CGG=GGCCGGGGGC +@gi|10019|ref|NC_19.1|-3/2 +GCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCCTTGGATGCGAGATAGAAACCAGAGTCACCCT ++ +CCCGGGGGGGGGGJG8CJ=1JGJJGJGJJJJJJGCGGGJJC(JJJCJJJJJGCGJGCCCJGJGC8GGCCCCCCJ8G=GGGGC=GGGCGCGGGGCGG1GGCGGC8JCJGCGG8GGGCGC(GGGGC=GG=GGCGCGGCGGCCCGCGGC=CG8 +@gi|10019|ref|NC_19.1|-1/2 +GCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTC ++ +CCCGGG=CGGGGGJGCJJJJ=GJCGJJJGJJJJGCCGGGJJJJJGJJJJJGJCGJGCGCGCJG=CGCGGGGGCCCG=CCG(GCCG=G=GGGGGGGCCCG(GCCJJJJGGGCGGG8GGGGGGCCGCCGGGGGC81CGGGCGGGCCCCG1CG +@gi|10020|ref|NC_20.1|-9/2 +GGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAA ++ +C81GGGGGGGGG(CGCGJ1JJJJCJ=JGGJCJJCGJJJJJJJJJ(C=GJ=CJJJJGJGJ=CCGCC8JCGGJGCGGCJGGGGGCGC=CCCCCGCGGGGGGGGCCCJCJGGGCGGGCGGGC=GGGGGGGG1CCGGGCCGGGGGGC=GG1C8= +@gi|10020|ref|NC_20.1|-7/2 +TTGCTAGAGTTGGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGT ++ +CCCGCGG1CGGGC=JCJJJJJGJJGGJGJGGJGJJGGGGGGG8GGJGC=CJGGGGGGGJ=GGJJJGC1GGCGGCC=JGC((=GCG=GCGGCCCC=CGCC=GCJJJCJG=1G==CGGGCGGCC=GGCG1G1=CG8GCCGGCGGGG1GCCCC +@gi|10020|ref|NC_20.1|-5/2 +TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAATATG ++ +CCCGG==GGGGGGGCJJJJJJJJGJJ1JJJ=JJJGJJGJJJGJGJCJG=CJJGGJ1GJGCJGGCG=C(G1G=CGGGGCCGCCCGGCGGCGCGCGCGGCGCCGCJCJJGGGGCGGCGG=G=GGGGGCCGGGGGGCGCG8=GGGCGCCCCCC +@gi|10020|ref|NC_20.1|-3/2 +ATCGAAACAAAGAGTCGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAG ++ +C1CG=GGG=GGGGCJ1JJGCJC=GGJJGJGGJJGJJGJJGCJGJJGJGJ1JGCJJGGGJCJJGCCCGGJGG==CJGGGCGGG1GGGG=CCCGC=CGCGGCCCCCJCCC=GGGGGGCGCCGGGGGG8CGG==CGGGGGGGGGGGGGGCCGC +@gi|10020|ref|NC_20.1|-1/2 +GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACT ++ +C=CGGGGGGGGG=JJJCJJ=JJJ(GJGJJJJJJ=JJJJCGGJJJJJGJGGJGJGGCGG=J=GJGJGGGCGGGGGCCGGGGCGCG=G=GGCG=GGGGCCCGG1CCJJJ=CG1G(CG=GGGGGCGGGCCGGGCCGGG=1CCCGCGGGCG=GC