Skip to content

Commit

Permalink
try to orient bar jobs on reference to left-align downstream
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Dec 12, 2023
1 parent 3bbad2a commit d6181fa
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 14 deletions.
33 changes: 33 additions & 0 deletions api/impl/cactus_params_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,36 @@ double cactusParams_get_float(CactusParams *p, int num, ...) {
va_end(args);
return j;
}

static bool cactusParams_has_attr2(CactusParams *p, int num, va_list *args) {
va_list args2;
va_copy(args2, *args); // to use the variable args we must copy it - see https://wiki.sei.cmu.edu/confluence/display/c/MSC39-C.+Do+not+call+va_arg%28%29+on+a+va_list+that+has+an+indeterminate+value
xmlNodePtr c = get_descendant_node(p->cur, num-1, &args2);

if(c == NULL) {
return false;
}
const char *attribute_name = NULL;
for (int64_t i = 0; i < num; i++) { // Loop to discard the earlier strings in the input
attribute_name = va_arg(args2, char *);
}
char *v = (char *)xmlGetProp(c, (const xmlChar *)attribute_name);

if(v == NULL) {
return false;
}

va_end(args2);

return true;

}

bool cactusParams_has_attr(CactusParams *p, int num, ...) {
va_list args;
bool ret;
va_start(args, num);
ret = cactusParams_has_attr2(p, num, &args);
va_end(args);
return ret;
}
7 changes: 6 additions & 1 deletion api/inc/cactus_params_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void cactusParams_set_root(CactusParams *p, int num, ...);
/*
* Get a string parameter.
*/
char *cactusParams_get_string(CactusParams *p, int, ...);
char *cactusParams_get_string(CactusParams *p, int num, ...);

/*
* Get an integer parameter.
Expand All @@ -56,4 +56,9 @@ double cactusParams_get_float(CactusParams *p, int, ...);
*/
int64_t *cactusParams_get_ints(CactusParams *p, int64_t *length, int, ...);

/*
* Check if attribute exists
*/
bool cactusParams_has_attr(CactusParams *p, int, ...);

#endif /* ST_CACTUS_PARAMS_PARSER_H_ */
11 changes: 10 additions & 1 deletion bar/impl/bar.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ void bar(stList *flowers, CactusParams *params, CactusDisk *cactusDisk, stList *
fa->minimumDegree = cactusParams_get_int(params, 2, "bar", "minimumBlockDegree");
fa->minimumNumberOfSpecies = cactusParams_get_int(params, 2, "bar", "minimumNumberOfSpecies");
fa->flower = flower;
// This option uses orients bar alignments so they are forward on the given event (to left-align in output)
Name forwardEventName = NULL_NAME;
if (cactusParams_has_attr(params, 3, "bar", "poa", "referenceEventName")) {
char *forwardEventHeader = cactusParams_get_string(params, 3, "bar", "poa", "referenceEventName");
Event *forwardEvent = eventTree_getEventByHeader(flower_getEventTree(flower), forwardEventHeader);
assert(forwardEvent != NULL);
forwardEventName = event_getName(forwardEvent);
free(forwardEventHeader);
}

void *alignments;
if (usePoa) {
Expand All @@ -101,7 +110,7 @@ void bar(stList *flowers, CactusParams *params, CactusDisk *cactusDisk, stList *
*
* It does not use any precomputed alignments, if they are provided they will be ignored
*/
alignments = make_flower_alignment_poa(flower, maximumLength, poaWindow, maskFilter, poaParameters);
alignments = make_flower_alignment_poa(flower, maximumLength, poaWindow, maskFilter, poaParameters, forwardEventName);
st_logDebug("Created the poa alignments: %" PRIi64 " poa alignment blocks for flower\n", stList_length(alignments));
} else {
alignments = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength,
Expand Down
40 changes: 34 additions & 6 deletions bar/impl/flowerAligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,19 +459,47 @@ int64_t getMaxAdjacencyLength(Flower *flower) {
return maxAdjacencyLength;
}

End *getDominantEnd(Flower *flower) {
End *getDominantEnd(Flower *flower, Name forward_event_name) {
//return NULL;
assert(flower_getGroupNumber(flower) <= 1);
Flower_EndIterator *endIt = flower_getEndIterator(flower);
End *end;
int64_t maxInstanceNumber = 0;
// Get the end with the largest number of caps
End *dominantEnd = NULL;
// break ties with the largest number of forward sequences (prioritizing the forward_event_name)
int64_t dominantForwardEvent = 0;
int64_t dominantForward = 0;
while ((end = flower_getNextEnd(endIt)) != NULL) {
if (end_getInstanceNumber(end) > maxInstanceNumber) {
maxInstanceNumber = end_getInstanceNumber(end);
dominantEnd = end;
}
int64_t endInstanceNumber = end_getInstanceNumber(end);
if (endInstanceNumber >= maxInstanceNumber) {
// compute the stranding information
End_InstanceIterator *endIt = end_getInstanceIterator(end);
Cap *cap;
int64_t forwardEventCount = 0;
int64_t forwardCount = 0;
while ((cap = end_getNext(endIt)) != NULL) {
break;
Cap *adj_cap = cap_getAdjacency(cap);
if (adj_cap != NULL && cap_getCoordinate(cap) < cap_getCoordinate(adj_cap)) {
++forwardCount;
Event *event = cap_getEvent(cap);
if (forward_event_name != NULL_NAME && event != NULL && event_getName(event) == forward_event_name) {
++forwardEventCount;
}
}
}
end_destructInstanceIterator(endIt);
if (endInstanceNumber > maxInstanceNumber ||
(endInstanceNumber == maxInstanceNumber &&
(forwardEventCount > dominantForwardEvent ||
(forwardEventCount == dominantForwardEvent && forwardCount > dominantForward)))) {
maxInstanceNumber = endInstanceNumber;
dominantEnd = end;
dominantForwardEvent = forwardEventCount;
dominantForward = forwardCount;
}
}
}
flower_destructEndIterator(endIt);
if (dominantEnd == NULL) { // This will only be true if there are no ends or only ends with no caps
Expand Down Expand Up @@ -504,7 +532,7 @@ static stSortedSet *getEndsToAlign(Flower *flower, int64_t maxSequenceLength) {
* Gets a set of the ends that we need to construct actual alignments for.
*/
stSortedSet *endsToAlign = stSortedSet_construct();
End *dominantEnd = getDominantEnd(flower); //an end to which all adjacencies are incident
End *dominantEnd = getDominantEnd(flower, NULL_NAME); //an end to which all adjacencies are incident
if (dominantEnd != NULL && getMaxAdjacencyLength(flower) <= 2 * maxSequenceLength) {
stSortedSet_insert(endsToAlign, dominantEnd);
} else {
Expand Down
4 changes: 2 additions & 2 deletions bar/impl/poaBarAligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -1094,8 +1094,8 @@ int64_t getMaxSequenceLength(End *end) {
}

stList *make_flower_alignment_poa(Flower *flower, int64_t max_seq_length, int64_t window_size, int64_t mask_filter,
abpoa_para_t * poa_parameters) {
End *dominantEnd = getDominantEnd(flower);
abpoa_para_t * poa_parameters, Name forward_event_name) {
End *dominantEnd = getDominantEnd(flower, forward_event_name);
int64_t seq_no = dominantEnd != NULL ? end_getInstanceNumber(dominantEnd) : -1;
if(dominantEnd != NULL && getMaxSequenceLength(dominantEnd) < max_seq_length) {
/*
Expand Down
3 changes: 2 additions & 1 deletion bar/inc/flowerAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ stSortedSet *makeFlowerAlignment3(StateMachine *sM, Flower *flower, stList *list

/*
* Returns an end, if exists, that has cap involved in every adjacency, else returns null.
* In case of tie, choose the end that would align forward along forward_event_name
*/
End *getDominantEnd(Flower *flower);
End *getDominantEnd(Flower *flower, Name forward_event_name);

/*
* Ascertain which ends should be aligned separately.
Expand Down
4 changes: 3 additions & 1 deletion bar/inc/poaBarAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,15 @@ char *get_adjacency_string(Cap *cap, int *length, bool return_string);
* @param mask_filter Trim input sequences if encountering this many consecutive soft of hard masked bases (0 = disabled)
* @param poa_band_constant abpoa "b" parameter, where adaptive band is b+f*<length> (b < 0 = disabled)
* @param poa_band_fraction abpoa "f" parameter, where adaptive band is b+f*<length> (b < 0 = disabled)
* @param forward_event_name try to orient (via getDominantEnd) alignments so they are forward wrt to this even (use NULL_NAME to disable)
* Returns a list of AlignmentBlock ojects
*/
stList *make_flower_alignment_poa(Flower *flower,
int64_t max_seq_length,
int64_t window_size,
int64_t mask_filter,
abpoa_para_t * poa_parameters);
abpoa_para_t * poa_parameters,
Name forward_event_name);

/**
* Create a pinch iterator for a list of alignment blocks.
Expand Down
4 changes: 2 additions & 2 deletions bar/tests/poaBarTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ void test_make_flower_alignment_poa(CuTest *testCase) {
}
flower_destructEndIterator(endIterator);

stList *alignment_blocks = make_flower_alignment_poa(flower, 2, 1000000, 5, abpt);
stList *alignment_blocks = make_flower_alignment_poa(flower, 2, 1000000, 5, abpt, NULL_NAME);

for(int64_t i=0; i<stList_length(alignment_blocks); i++) {
AlignmentBlock *b = stList_get(alignment_blocks, i);
Expand All @@ -233,7 +233,7 @@ void test_alignment_block_iterator(CuTest *testCase) {
abpt->wf = 0.01;
abpoa_post_set_para(abpt);

stList *alignment_blocks = make_flower_alignment_poa(flower, 10000, 1000000, 5, abpt);
stList *alignment_blocks = make_flower_alignment_poa(flower, 10000, 1000000, 5, abpt, NULL_NAME);

abpoa_free_para(abpt);
#ifdef stderr_logging
Expand Down
3 changes: 3 additions & 0 deletions src/cactus/setup/cactus_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None):
barNode.attrib["minimumBlockDegree"] = "1"
# turn off POA seeding
poaNode.attrib["partialOrderAlignmentDisableSeeding"] = "1"
if options.reference:
# activate left-aligning for the reference inside BAR
poaNode.attrib["referenceEventName"] = options.reference[0]

# import the PAF alignments
paf_id = toil.importFile(makeURL(options.pafFile))
Expand Down

0 comments on commit d6181fa

Please sign in to comment.