Skip to content

Commit

Permalink
feat: output umi grouping metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
znorgaard committed Dec 16, 2024
1 parent 1c96bd7 commit 27b140d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
29 changes: 29 additions & 0 deletions src/main/scala/com/fulcrumgenomics/umi/GroupReadsByUmi.scala
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,20 @@ case class TagFamilySizeMetric(family_size: Int,
var fraction: Proportion = 0,
var fraction_gt_or_eq_family_size: Proportion = 0) extends Metric

/**
* Metrics produced by `GroupReadsByUmi` to describe reads passed through UMI grouping
* @param reads The number of reads accepted for grouping
* @param filteredNonPf The number of non-PF reads
* @param filteredPoorAlignment The number of templates discarded for poor
* @param filteredNsInUmi The number of templates discarded due to one or more Ns in the UMI
* @param filterUmisTooShort The number of templates discarded due to a shorter than expected UMI
*/
case class UmiGroupingMetric(reads: Long,
filteredNonPf: Long,
filteredPoorAlignment: Long,
filteredNsInUmi: Long,
filterUmisTooShort: Long) extends Metric

/** The strategies implemented by [[GroupReadsByUmi]] to identify reads from the same source molecule.*/
sealed trait Strategy extends EnumEntry {
def newStrategy(edits: Int, threads: Int): UmiAssigner
Expand Down Expand Up @@ -568,6 +582,7 @@ class GroupReadsByUmi
(@arg(flag='i', doc="The input BAM file.") val input: PathToBam = Io.StdIn,
@arg(flag='o', doc="The output BAM file.") val output: PathToBam = Io.StdOut,
@arg(flag='f', doc="Optional output of tag family size counts.") val familySizeHistogram: Option[FilePath] = None,
@arg(flag='g', doc="Optional output of UMI grouping metrics.") val groupingMetrics: Option[FilePath] = None,
@arg(flag='t', doc="The tag containing the raw UMI.") val rawTag: String = ConsensusTags.UmiBases,
@arg(flag='T', doc="The output tag for UMI grouping.") val assignTag: String = ConsensusTags.MolecularId,
@arg(flag='d', doc="Turn on duplicate marking mode.") val markDuplicates: Boolean = false,
Expand Down Expand Up @@ -742,6 +757,20 @@ class GroupReadsByUmi
ms.tails.foreach { tail => tail.headOption.foreach(m => m.fraction_gt_or_eq_family_size = tail.map(_.fraction).sum) }
Metric.write(p, ms)
}

// Write out UMI grouping metrics
this.groupingMetrics match {
case None => ()
case Some(p) =>
val groupingMetrics = UmiGroupingMetric(
reads = kept,
filteredNonPf = filteredNonPf,
filteredPoorAlignment = filteredPoorAlignment,
filteredNsInUmi = filteredNsInUmi,
filterUmisTooShort = filterUmisTooShort,
)
Metric.write(p, groupingMetrics)
}
}

private def logStats(): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ class GroupReadsByUmiTest extends UnitSpec with OptionValues with PrivateMethodT
val in = builder.toTempFile()
val out = Files.createTempFile("umi_grouped.", ".sam")
val hist = Files.createTempFile("umi_grouped.", ".histogram.txt")
val tool = new GroupReadsByUmi(input=in, output=out, familySizeHistogram=Some(hist), rawTag="RX", assignTag="MI", strategy=Strategy.Edit, edits=1, minMapQ=Some(30))
val metrics = Files.createTempFile("umi_grouped.", ".metrics.txt")
val tool = new GroupReadsByUmi(input=in, output=out, familySizeHistogram=Some(hist), groupingMetrics=Some(metrics), rawTag="RX", assignTag="MI", strategy=Strategy.Edit, edits=1, minMapQ=Some(30))
val logs = executeFgbioTool(tool)

val groups = readBamRecs(out).groupBy(_.name.charAt(0))
Expand All @@ -267,6 +268,8 @@ class GroupReadsByUmiTest extends UnitSpec with OptionValues with PrivateMethodT

hist.toFile.exists() shouldBe true

metrics.toFile.exists() shouldBe true

// Make sure that we skip sorting for TemplateCoordinate
val sortMessage = "Sorting the input to TemplateCoordinate order"
logs.exists(_.contains(sortMessage)) shouldBe (sortOrder != TemplateCoordinate)
Expand Down

0 comments on commit 27b140d

Please sign in to comment.