From 2e2f9799a616c404694f03b0469aaac6869023fa Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 6 Sep 2024 15:31:36 -0700
Subject: [PATCH] Get even more tests to pass.

---
 R/SingleR.R                      |  2 +-
 R/combineRecomputedResults.R     |  5 +++--
 man/combineRecomputedResults.Rd  |  9 +++++----
 tests/testthat/test-SingleR.R    |  9 +++++----
 tests/testthat/test-classify.R   | 14 +-------------
 tests/testthat/test-recomputed.R | 29 ++++++++---------------------
 6 files changed, 23 insertions(+), 45 deletions(-)

diff --git a/R/SingleR.R b/R/SingleR.R
index 6ed93fc..2d33145 100644
--- a/R/SingleR.R
+++ b/R/SingleR.R
@@ -108,7 +108,7 @@ SingleR <- function(
     test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)
 
     tmp.ref <- ref
-    if (!is.list(tmp.ref)) {
+    if (!is.list(tmp.ref) || is.data.frame(tmp.ref)) {
         tmp.ref <- list(ref)
     }
     for (rr in tmp.ref) {
diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R
index f783bb3..f1f1b69 100644
--- a/R/combineRecomputedResults.R
+++ b/R/combineRecomputedResults.R
@@ -6,6 +6,7 @@
 #'
 #' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.
 #' @inheritParams SingleR
+#' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.
 #' @param trained A list of \linkS4class{List}s containing the trained outputs of multiple references,
 #' equivalent to either (i) the output of \code{\link{trainSingleR}} on multiple references with \code{recompute=TRUE},
 #' or (ii) running \code{trainSingleR} on each reference separately and manually making a list of the trained outputs.
@@ -107,13 +108,13 @@ combineRecomputedResults <- function(
     trained, 
     quantile=0.8, 
     assay.type.test="logcounts", 
-    check.missing=TRUE, 
+    check.missing=FALSE,
     warn.lost=TRUE,
     allow.lost=FALSE, 
     num.threads = bpnworkers(BPPARAM),
     BPPARAM=SerialParam())
 {
-    test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM)
+    test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=FALSE, msg="test", BPPARAM=BPPARAM)
 
     # Applying the sanity checks.
     stopifnot(length(results) == length(trained))
diff --git a/man/combineRecomputedResults.Rd b/man/combineRecomputedResults.Rd
index 9ff4a95..8c04aed 100644
--- a/man/combineRecomputedResults.Rd
+++ b/man/combineRecomputedResults.Rd
@@ -10,7 +10,8 @@ combineRecomputedResults(
   trained,
   quantile = 0.8,
   assay.type.test = "logcounts",
-  check.missing = TRUE,
+  check.missing = FALSE,
+  warn.lost = TRUE,
   allow.lost = FALSE,
   num.threads = bpnworkers(BPPARAM),
   BPPARAM = SerialParam()
@@ -31,7 +32,9 @@ or (ii) running \code{trainSingleR} on each reference separately and manually ma
 \item{assay.type.test}{An integer scalar or string specifying the assay of \code{test} containing the relevant expression matrix,
 if \code{test} is a \linkS4class{SummarizedExperiment} object.}
 
-\item{check.missing}{Logical scalar indicating whether rows should be checked for missing values (and if found, removed).}
+\item{check.missing}{Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.}
+
+\item{warn.lost}{Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.}
 
 \item{allow.lost}{Deprecated.}
 
@@ -39,8 +42,6 @@ if \code{test} is a \linkS4class{SummarizedExperiment} object.}
 
 \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed in other steps,
 see \code{?\link{trainSingleR}} and \code{?\link{classifySingleR}} for more details.}
-
-\item{warn.lost}{Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.}
 }
 \value{
 A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row).
diff --git a/tests/testthat/test-SingleR.R b/tests/testthat/test-SingleR.R
index 5b0b06a..37f6e43 100644
--- a/tests/testthat/test-SingleR.R
+++ b/tests/testthat/test-SingleR.R
@@ -70,10 +70,11 @@ test_that("SingleR handles DelayedArray inputs", {
 })
 
 test_that("SingleR works with multiple references", {
-    # Handles mismatching row names.
-    chosen0 <- sample(rownames(training), 900)
-    chosen1 <- sample(rownames(training), 900)
-    chosen2 <- sample(rownames(training), 900)
+    # Handles mismatching row names. Note that the sorting is necessary
+    # to ensure that tied genes are handled in a consistent way.
+    chosen0 <- sort(sample(rownames(training), 900))
+    chosen1 <- sort(sample(rownames(training), 900))
+    chosen2 <- sort(sample(rownames(training), 900))
 
     # Works with recomputation.
     out <- SingleR(test[chosen0,], list(training[chosen1,], training[chosen2,]), 
diff --git a/tests/testthat/test-classify.R b/tests/testthat/test-classify.R
index 4ad16e0..05d0790 100644
--- a/tests/testthat/test-classify.R
+++ b/tests/testthat/test-classify.R
@@ -79,18 +79,6 @@ test_that("classifySingleR behaves with no-variance cells", {
     expect_identical(out$labels[-(1:10)], ref$labels[-(1:10)])
 })
 
-test_that("classifySingleR behaves with missing values", {
-    # Can't just set the first entry to NA, as we need to ensure 
-    # that the test set contains a superset of genes in the training set.
-    sce <- BiocGenerics::rbind(test[1,], test)
-    logcounts(sce)[1,1] <- NA
-
-    Q <- 0.8
-    out <- classifySingleR(sce, trained, fine.tune=FALSE, quantile=Q)
-    ref <- classifySingleR(test, trained, fine.tune=FALSE, quantile=Q)
-    expect_identical(out, ref)
-})
-
 test_that("classifySingleR works with multiple references", {
     training1 <- training2 <- training
     training1 <- training1[sample(nrow(training1)),]
@@ -108,5 +96,5 @@ test_that("classifySingleR behaves with silly inputs", {
     out <- classifySingleR(test[,0], trained, fine.tune=FALSE)
     expect_identical(nrow(out$scores), 0L)
     expect_identical(length(out$labels), 0L)
-    expect_error(classifySingleR(test[0,], trained, fine.tune=FALSE), "does not contain")
+    expect_error(classifySingleR(test[0,], trained, fine.tune=FALSE), "expected 'rownames(test)' to be the same", fixed=TRUE)
 })
diff --git a/tests/testthat/test-recomputed.R b/tests/testthat/test-recomputed.R
index 18f7fa6..be75d4c 100644
--- a/tests/testthat/test-recomputed.R
+++ b/tests/testthat/test-recomputed.R
@@ -109,32 +109,19 @@ test_that("combineRecomputedResults handles mismatches to rows and cells", {
 })
 
 test_that("combineRecomputedResults emits warnings when missing genes are present", {
+    half <- nrow(test) / 2
+
     # Spiking in some missing genes.
-    ref1b <- ref1[c(1, seq_len(nrow(ref1))),]
-    rownames(ref1b)[1] <- "BLAH"
-    markers1 <- train1$markers$full
-    markers1$A$B <- c(markers1$A$B, "BLAH")
-    train1b <- trainSingleR(ref1b, labels=ref1$label, genes=markers1, test.genes=rownames(test))
-
-    ref2b <- ref2[c(1, seq_len(nrow(ref2))),]
-    rownames(ref2b)[1] <- "WHEE"
-    markers2 <- train2$markers$full
-    markers2$A$B <- c(markers2$a$b, "WHEE")
-    train2b <- trainSingleR(ref2b, labels=ref2$label, genes=markers2, test.genes=rownames(test))
-
-    expect_error(out <- combineRecomputedResults(
-        results=list(pred1, pred2), 
-        test=test,
-        trained=list(train1b, train2b)), "should be present")
+    ref1b <- ref1[seq_len(half),,drop=FALSE]
+    train1b <- trainSingleR(ref1b, labels=ref1$label, test.genes=rownames(test))
 
-    test2 <- test[c(1,seq_len(nrow(test)),1),]
-    rownames(test2)[1] <- "WHEE"
-    rownames(test2)[length(rownames(test2))] <- "BLAH"
+    ref2b <- ref2[half + seq_len(half),]
+    train2b <- trainSingleR(ref2b, labels=ref2$label, test.genes=rownames(test))
 
     expect_warning(out <- combineRecomputedResults(
         results=list(pred1, pred2), 
-        test=test2,
-        trained=list(train1b, train2b)), "differ in the universe")
+        test=test,
+        trained=list(train1b, train2b)), "available in each reference")
 })
 
 test_that("combineRecomputedResults is invariant to ordering", {