Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

try to orient bar jobs on reference to left-align downstream #1248

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions api/impl/cactus_params_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,36 @@ double cactusParams_get_float(CactusParams *p, int num, ...) {
va_end(args);
return j;
}

static bool cactusParams_has_attr2(CactusParams *p, int num, va_list *args) {
va_list args2;
va_copy(args2, *args); // to use the variable args we must copy it - see https://wiki.sei.cmu.edu/confluence/display/c/MSC39-C.+Do+not+call+va_arg%28%29+on+a+va_list+that+has+an+indeterminate+value
xmlNodePtr c = get_descendant_node(p->cur, num-1, &args2);

if(c == NULL) {
return false;
}
const char *attribute_name = NULL;
for (int64_t i = 0; i < num; i++) { // Loop to discard the earlier strings in the input
attribute_name = va_arg(args2, char *);
}
char *v = (char *)xmlGetProp(c, (const xmlChar *)attribute_name);

if(v == NULL) {
return false;
}

va_end(args2);

return true;

}

bool cactusParams_has_attr(CactusParams *p, int num, ...) {
va_list args;
bool ret;
va_start(args, num);
ret = cactusParams_has_attr2(p, num, &args);
va_end(args);
return ret;
}
7 changes: 6 additions & 1 deletion api/inc/cactus_params_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void cactusParams_set_root(CactusParams *p, int num, ...);
/*
* Get a string parameter.
*/
char *cactusParams_get_string(CactusParams *p, int, ...);
char *cactusParams_get_string(CactusParams *p, int num, ...);

/*
* Get an integer parameter.
Expand All @@ -56,4 +56,9 @@ double cactusParams_get_float(CactusParams *p, int, ...);
*/
int64_t *cactusParams_get_ints(CactusParams *p, int64_t *length, int, ...);

/*
* Check if attribute exists
*/
bool cactusParams_has_attr(CactusParams *p, int, ...);

#endif /* ST_CACTUS_PARAMS_PARSER_H_ */
11 changes: 10 additions & 1 deletion bar/impl/bar.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ void bar(stList *flowers, CactusParams *params, CactusDisk *cactusDisk, stList *
fa->minimumDegree = cactusParams_get_int(params, 2, "bar", "minimumBlockDegree");
fa->minimumNumberOfSpecies = cactusParams_get_int(params, 2, "bar", "minimumNumberOfSpecies");
fa->flower = flower;
// This option uses orients bar alignments so they are forward on the given event (to left-align in output)
Name forwardEventName = NULL_NAME;
if (cactusParams_has_attr(params, 3, "bar", "poa", "referenceEventName")) {
char *forwardEventHeader = cactusParams_get_string(params, 3, "bar", "poa", "referenceEventName");
Event *forwardEvent = eventTree_getEventByHeader(flower_getEventTree(flower), forwardEventHeader);
assert(forwardEvent != NULL);
forwardEventName = event_getName(forwardEvent);
free(forwardEventHeader);
}

void *alignments;
if (usePoa) {
Expand All @@ -101,7 +110,7 @@ void bar(stList *flowers, CactusParams *params, CactusDisk *cactusDisk, stList *
*
* It does not use any precomputed alignments, if they are provided they will be ignored
*/
alignments = make_flower_alignment_poa(flower, maximumLength, poaWindow, maskFilter, poaParameters);
alignments = make_flower_alignment_poa(flower, maximumLength, poaWindow, maskFilter, poaParameters, forwardEventName);
st_logDebug("Created the poa alignments: %" PRIi64 " poa alignment blocks for flower\n", stList_length(alignments));
} else {
alignments = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength,
Expand Down
52 changes: 46 additions & 6 deletions bar/impl/flowerAligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,21 +459,61 @@ int64_t getMaxAdjacencyLength(Flower *flower) {
return maxAdjacencyLength;
}

End *getDominantEnd(Flower *flower) {
//return NULL;
End *getDominantEnd(Flower *flower, Name refEventName, bool* outIsFlipped) {
assert(flower_getGroupNumber(flower) <= 1);
Flower_EndIterator *endIt = flower_getEndIterator(flower);
End *end;
int64_t maxInstanceNumber = 0;
// Get the end with the largest number of caps
End *dominantEnd = NULL;
// break ties with the largest number of forward sequences (prioritizing the refEventName)
int64_t dominantForwardRefBases = 0;
int64_t dominantReverseRefBases = 0;
int64_t dominantForwardCount = 0;
while ((end = flower_getNextEnd(endIt)) != NULL) {
if (end_getInstanceNumber(end) > maxInstanceNumber) {
maxInstanceNumber = end_getInstanceNumber(end);
dominantEnd = end;
int64_t endInstanceNumber = end_getInstanceNumber(end);
if (endInstanceNumber >= maxInstanceNumber) {
// compute the stranding information
End_InstanceIterator *endIt = end_getInstanceIterator(end);
Cap *cap;
int64_t forwardRefBases = 0;
int64_t reverseRefBases = 0;
int64_t forwardCount = 0;
while ((cap = end_getNext(endIt)) != NULL) {
Cap *adj_cap = cap_getAdjacency(cap);
if (adj_cap != NULL) {
int64_t cap_coord = cap_getCoordinate(cap);
int64_t adj_coord = cap_getCoordinate(adj_cap);
Event *event = cap_getEvent(cap);
bool is_ref = refEventName != NULL_NAME && event != NULL && event_getName(event) == refEventName;
if (cap_coord < adj_coord) {
++forwardCount;
if (is_ref) {
forwardRefBases += adj_coord - cap_coord;
}
} else if (cap_coord > adj_coord && is_ref) {
reverseRefBases += cap_coord - adj_coord;
}
}
}
end_destructInstanceIterator(endIt);
if (endInstanceNumber > maxInstanceNumber ||
(endInstanceNumber == maxInstanceNumber &&
(forwardRefBases > dominantForwardRefBases ||
(forwardRefBases == dominantForwardRefBases && forwardCount > dominantForwardCount)))) {
maxInstanceNumber = endInstanceNumber;
dominantEnd = end;
dominantForwardRefBases = forwardRefBases;
dominantReverseRefBases = reverseRefBases;
dominantForwardCount = forwardCount;
}
}
}

flower_destructEndIterator(endIt);
if (outIsFlipped != NULL) {
*outIsFlipped = dominantReverseRefBases > dominantForwardRefBases;
}
if (dominantEnd == NULL) { // This will only be true if there are no ends or only ends with no caps
return NULL;
}
Expand Down Expand Up @@ -504,7 +544,7 @@ static stSortedSet *getEndsToAlign(Flower *flower, int64_t maxSequenceLength) {
* Gets a set of the ends that we need to construct actual alignments for.
*/
stSortedSet *endsToAlign = stSortedSet_construct();
End *dominantEnd = getDominantEnd(flower); //an end to which all adjacencies are incident
End *dominantEnd = getDominantEnd(flower, NULL_NAME, NULL); //an end to which all adjacencies are incident
if (dominantEnd != NULL && getMaxAdjacencyLength(flower) <= 2 * maxSequenceLength) {
stSortedSet_insert(endsToAlign, dominantEnd);
} else {
Expand Down
31 changes: 29 additions & 2 deletions bar/impl/poaBarAligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -1094,8 +1094,9 @@ int64_t getMaxSequenceLength(End *end) {
}

stList *make_flower_alignment_poa(Flower *flower, int64_t max_seq_length, int64_t window_size, int64_t mask_filter,
abpoa_para_t * poa_parameters) {
End *dominantEnd = getDominantEnd(flower);
abpoa_para_t * poa_parameters, Name forward_event_name) {
bool is_flipped = false;
End *dominantEnd = getDominantEnd(flower, forward_event_name, &is_flipped);
int64_t seq_no = dominantEnd != NULL ? end_getInstanceNumber(dominantEnd) : -1;
if(dominantEnd != NULL && getMaxSequenceLength(dominantEnd) < max_seq_length) {
/*
Expand All @@ -1110,8 +1111,34 @@ stList *make_flower_alignment_poa(Flower *flower, int64_t max_seq_length, int64_
Cap *indices_to_caps[seq_no];

get_end_sequences(dominantEnd, end_strings, end_string_lengths, overlaps, indices_to_caps, max_seq_length, mask_filter);

if (is_flipped) {
for (int64_t row = 0; row < seq_no; ++row) {
// todo: in place reverse complement
char *flipped_string = stString_reverseComplementString(end_strings[row]);
free(end_strings[row]);
end_strings[row] = flipped_string;
}
}

Msa *msa = msa_make_partial_order_alignment(end_strings, end_string_lengths, seq_no, window_size, poa_parameters);

if (is_flipped) {
for (int64_t row = 0; row < seq_no; ++row) {
// todo: in place reverse complement
char *flipped_string = stString_reverseComplementString(msa->seqs[row]);
free(msa->seqs[row]);
msa->seqs[row] = flipped_string;
uint8_t *flipped_row = st_malloc(sizeof(uint8_t*) * msa->column_no);
for (int64_t col = 0; col < msa->column_no; ++col) {
char c = stString_reverseComplementChar(msa_to_base(msa->seqs[row][col]));
flipped_row[msa->column_no - 1 - col] = msa_to_byte(c);
}
free(msa->seqs[row]);
msa->seqs[row] = flipped_row;
}
}

//Now convert to set of alignment blocks
stList *alignment_blocks = stList_construct3(0, (void (*)(void *))alignmentBlock_destruct);
create_alignment_blocks(msa, indices_to_caps, alignment_blocks);
Expand Down
4 changes: 3 additions & 1 deletion bar/inc/flowerAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ stSortedSet *makeFlowerAlignment3(StateMachine *sM, Flower *flower, stList *list

/*
* Returns an end, if exists, that has cap involved in every adjacency, else returns null.
* In case of tie, choose the end that would align forward along refEventName
* If the dominant end is still flipped along the reference, set outIsFlipped (if it isn't NULL) to true
*/
End *getDominantEnd(Flower *flower);
End *getDominantEnd(Flower *flower, Name refEventName, bool* outIsFlipped);

/*
* Ascertain which ends should be aligned separately.
Expand Down
4 changes: 3 additions & 1 deletion bar/inc/poaBarAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,15 @@ char *get_adjacency_string(Cap *cap, int *length, bool return_string);
* @param mask_filter Trim input sequences if encountering this many consecutive soft of hard masked bases (0 = disabled)
* @param poa_band_constant abpoa "b" parameter, where adaptive band is b+f*<length> (b < 0 = disabled)
* @param poa_band_fraction abpoa "f" parameter, where adaptive band is b+f*<length> (b < 0 = disabled)
* @param forward_event_name try to orient (via getDominantEnd) alignments so they are forward wrt to this even (use NULL_NAME to disable)
* Returns a list of AlignmentBlock ojects
*/
stList *make_flower_alignment_poa(Flower *flower,
int64_t max_seq_length,
int64_t window_size,
int64_t mask_filter,
abpoa_para_t * poa_parameters);
abpoa_para_t * poa_parameters,
Name forward_event_name);

/**
* Create a pinch iterator for a list of alignment blocks.
Expand Down
4 changes: 2 additions & 2 deletions bar/tests/poaBarTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ void test_make_flower_alignment_poa(CuTest *testCase) {
}
flower_destructEndIterator(endIterator);

stList *alignment_blocks = make_flower_alignment_poa(flower, 2, 1000000, 5, abpt);
stList *alignment_blocks = make_flower_alignment_poa(flower, 2, 1000000, 5, abpt, NULL_NAME);

for(int64_t i=0; i<stList_length(alignment_blocks); i++) {
AlignmentBlock *b = stList_get(alignment_blocks, i);
Expand All @@ -233,7 +233,7 @@ void test_alignment_block_iterator(CuTest *testCase) {
abpt->wf = 0.01;
abpoa_post_set_para(abpt);

stList *alignment_blocks = make_flower_alignment_poa(flower, 10000, 1000000, 5, abpt);
stList *alignment_blocks = make_flower_alignment_poa(flower, 10000, 1000000, 5, abpt, NULL_NAME);

abpoa_free_para(abpt);
#ifdef stderr_logging
Expand Down
3 changes: 3 additions & 0 deletions src/cactus/setup/cactus_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None):
barNode.attrib["minimumBlockDegree"] = "1"
# turn off POA seeding
poaNode.attrib["partialOrderAlignmentDisableSeeding"] = "1"
if options.reference:
# activate left-aligning for the reference inside BAR
poaNode.attrib["referenceEventName"] = options.reference[0]

# import the PAF alignments
paf_id = toil.importFile(makeURL(options.pafFile))
Expand Down