From 115c01abcfe01bfb7a7a03317b32ca6d88d84e69 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Mon, 18 Nov 2024 10:24:00 -0500 Subject: [PATCH] assert written seq strings contain only valid chars --- impl/fasta_chunk.c | 7 +++++++ impl/fasta_extract.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/impl/fasta_chunk.c b/impl/fasta_chunk.c index a11c450..7379909 100644 --- a/impl/fasta_chunk.c +++ b/impl/fasta_chunk.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "bioioC.h" #include "commonC.h" #include "sonLib.h" @@ -85,6 +86,12 @@ static void processSequenceToChunk(void* dest, const char *fastaHeader, const ch char *seq_chunk = stString_getSubString(sequence, i, j-i); assert(strlen(seq_chunk) == j - i); + // sanity check + for (int64_t k = 0; k < j-i; ++k) { + char c = tolower(seq_chunk[k]); + assert(c == 'a' || c == 'c' || c == 'g' || c == 't' || c == 'n'); + } + // print the sequence to the file fprintf(chunkFileHandle, "%s\n", seq_chunk); free(seq_chunk); // cleanup the fragment diff --git a/impl/fasta_extract.c b/impl/fasta_extract.c index 17594fe..3929c90 100644 --- a/impl/fasta_extract.c +++ b/impl/fasta_extract.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "bioioC.h" #include "commonC.h" #include "sonLib.h" @@ -37,6 +38,11 @@ static void report_interval(FILE *output, char *seq_name, int64_t start, int64_t int64_t seq_length = (int64_t)stHash_search(sequenceLengths, seq_name); assert(0 <= start); assert(start <= end); assert(end <= seq_length); char *s = stString_getSubString(sequence, start, end-start); + // sanity check + for (int64_t i = 0; i < end-start; ++i) { + char c = tolower(s[i]); + assert(c == 'a' || c == 'c' || c == 'g' || c == 't' || c == 'n'); + } fprintf(output, ">%s|%" PRIi64 "|%" PRIi64 "\n%s\n", seq_name, seq_length, start, s); free(s); }