diff --git a/R/SingleR.R b/R/SingleR.R index 6ed93fc..2d33145 100644 --- a/R/SingleR.R +++ b/R/SingleR.R @@ -108,7 +108,7 @@ SingleR <- function( test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM) tmp.ref <- ref - if (!is.list(tmp.ref)) { + if (!is.list(tmp.ref) || is.data.frame(tmp.ref)) { tmp.ref <- list(ref) } for (rr in tmp.ref) { diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R index f783bb3..f1f1b69 100644 --- a/R/combineRecomputedResults.R +++ b/R/combineRecomputedResults.R @@ -6,6 +6,7 @@ #' #' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately. #' @inheritParams SingleR +#' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}. #' @param trained A list of \linkS4class{List}s containing the trained outputs of multiple references, #' equivalent to either (i) the output of \code{\link{trainSingleR}} on multiple references with \code{recompute=TRUE}, #' or (ii) running \code{trainSingleR} on each reference separately and manually making a list of the trained outputs. @@ -107,13 +108,13 @@ combineRecomputedResults <- function( trained, quantile=0.8, assay.type.test="logcounts", - check.missing=TRUE, + check.missing=FALSE, warn.lost=TRUE, allow.lost=FALSE, num.threads = bpnworkers(BPPARAM), BPPARAM=SerialParam()) { - test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM) + test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=FALSE, msg="test", BPPARAM=BPPARAM) # Applying the sanity checks. stopifnot(length(results) == length(trained)) diff --git a/man/combineRecomputedResults.Rd b/man/combineRecomputedResults.Rd index 9ff4a95..8c04aed 100644 --- a/man/combineRecomputedResults.Rd +++ b/man/combineRecomputedResults.Rd @@ -10,7 +10,8 @@ combineRecomputedResults( trained, quantile = 0.8, assay.type.test = "logcounts", - check.missing = TRUE, + check.missing = FALSE, + warn.lost = TRUE, allow.lost = FALSE, num.threads = bpnworkers(BPPARAM), BPPARAM = SerialParam() @@ -31,7 +32,9 @@ or (ii) running \code{trainSingleR} on each reference separately and manually ma \item{assay.type.test}{An integer scalar or string specifying the assay of \code{test} containing the relevant expression matrix, if \code{test} is a \linkS4class{SummarizedExperiment} object.} -\item{check.missing}{Logical scalar indicating whether rows should be checked for missing values (and if found, removed).} +\item{check.missing}{Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.} + +\item{warn.lost}{Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.} \item{allow.lost}{Deprecated.} @@ -39,8 +42,6 @@ if \code{test} is a \linkS4class{SummarizedExperiment} object.} \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed in other steps, see \code{?\link{trainSingleR}} and \code{?\link{classifySingleR}} for more details.} - -\item{warn.lost}{Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.} } \value{ A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row). diff --git a/tests/testthat/test-SingleR.R b/tests/testthat/test-SingleR.R index 5b0b06a..37f6e43 100644 --- a/tests/testthat/test-SingleR.R +++ b/tests/testthat/test-SingleR.R @@ -70,10 +70,11 @@ test_that("SingleR handles DelayedArray inputs", { }) test_that("SingleR works with multiple references", { - # Handles mismatching row names. - chosen0 <- sample(rownames(training), 900) - chosen1 <- sample(rownames(training), 900) - chosen2 <- sample(rownames(training), 900) + # Handles mismatching row names. Note that the sorting is necessary + # to ensure that tied genes are handled in a consistent way. + chosen0 <- sort(sample(rownames(training), 900)) + chosen1 <- sort(sample(rownames(training), 900)) + chosen2 <- sort(sample(rownames(training), 900)) # Works with recomputation. out <- SingleR(test[chosen0,], list(training[chosen1,], training[chosen2,]), diff --git a/tests/testthat/test-classify.R b/tests/testthat/test-classify.R index 4ad16e0..05d0790 100644 --- a/tests/testthat/test-classify.R +++ b/tests/testthat/test-classify.R @@ -79,18 +79,6 @@ test_that("classifySingleR behaves with no-variance cells", { expect_identical(out$labels[-(1:10)], ref$labels[-(1:10)]) }) -test_that("classifySingleR behaves with missing values", { - # Can't just set the first entry to NA, as we need to ensure - # that the test set contains a superset of genes in the training set. - sce <- BiocGenerics::rbind(test[1,], test) - logcounts(sce)[1,1] <- NA - - Q <- 0.8 - out <- classifySingleR(sce, trained, fine.tune=FALSE, quantile=Q) - ref <- classifySingleR(test, trained, fine.tune=FALSE, quantile=Q) - expect_identical(out, ref) -}) - test_that("classifySingleR works with multiple references", { training1 <- training2 <- training training1 <- training1[sample(nrow(training1)),] @@ -108,5 +96,5 @@ test_that("classifySingleR behaves with silly inputs", { out <- classifySingleR(test[,0], trained, fine.tune=FALSE) expect_identical(nrow(out$scores), 0L) expect_identical(length(out$labels), 0L) - expect_error(classifySingleR(test[0,], trained, fine.tune=FALSE), "does not contain") + expect_error(classifySingleR(test[0,], trained, fine.tune=FALSE), "expected 'rownames(test)' to be the same", fixed=TRUE) }) diff --git a/tests/testthat/test-recomputed.R b/tests/testthat/test-recomputed.R index 18f7fa6..be75d4c 100644 --- a/tests/testthat/test-recomputed.R +++ b/tests/testthat/test-recomputed.R @@ -109,32 +109,19 @@ test_that("combineRecomputedResults handles mismatches to rows and cells", { }) test_that("combineRecomputedResults emits warnings when missing genes are present", { + half <- nrow(test) / 2 + # Spiking in some missing genes. - ref1b <- ref1[c(1, seq_len(nrow(ref1))),] - rownames(ref1b)[1] <- "BLAH" - markers1 <- train1$markers$full - markers1$A$B <- c(markers1$A$B, "BLAH") - train1b <- trainSingleR(ref1b, labels=ref1$label, genes=markers1, test.genes=rownames(test)) - - ref2b <- ref2[c(1, seq_len(nrow(ref2))),] - rownames(ref2b)[1] <- "WHEE" - markers2 <- train2$markers$full - markers2$A$B <- c(markers2$a$b, "WHEE") - train2b <- trainSingleR(ref2b, labels=ref2$label, genes=markers2, test.genes=rownames(test)) - - expect_error(out <- combineRecomputedResults( - results=list(pred1, pred2), - test=test, - trained=list(train1b, train2b)), "should be present") + ref1b <- ref1[seq_len(half),,drop=FALSE] + train1b <- trainSingleR(ref1b, labels=ref1$label, test.genes=rownames(test)) - test2 <- test[c(1,seq_len(nrow(test)),1),] - rownames(test2)[1] <- "WHEE" - rownames(test2)[length(rownames(test2))] <- "BLAH" + ref2b <- ref2[half + seq_len(half),] + train2b <- trainSingleR(ref2b, labels=ref2$label, test.genes=rownames(test)) expect_warning(out <- combineRecomputedResults( results=list(pred1, pred2), - test=test2, - trained=list(train1b, train2b)), "differ in the universe") + test=test, + trained=list(train1b, train2b)), "available in each reference") }) test_that("combineRecomputedResults is invariant to ordering", {