Inch a bit closer.

SingleR-inc · Sep 6, 2024 · 3faaf82 · 3faaf82
1 parent 81c860e
commit 3faaf82
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 61 deletions.
diff --git a/R/SingleR.R b/R/SingleR.R
@@ -102,34 +102,10 @@ SingleR <- function(
         bpstart(BPPARAM)
         on.exit(bpstop(BPPARAM))
     }
-    test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)
-
-    # Converting to a common list format for ease of data munging.
-    if (single.ref <- !.is_list(ref)) {
-        ref <- list(ref)
-    }
 
-    ref <- lapply(ref, FUN=.to_clean_matrix, assay.type=assay.type.ref, 
-        check.missing=check.missing, msg="ref", BPPARAM=BPPARAM)
-    refnames <- Reduce(intersect, lapply(ref, rownames))
-
-    keep <- intersect(rownames(test), refnames)
-    if (length(keep) == 0) {
-        stop("no common genes between 'test' and 'ref'")
-    }
-    if (!identical(keep, rownames(test))) {
-        test <- test[keep,]
-    }
-    for (i in seq_along(ref)) {
-        if (!identical(keep, rownames(ref[[i]]))) {
-            ref[[i]] <- ref[[i]][keep,,drop=FALSE]
-        }
-    }
-
-    # Converting back.
-    if (single.ref) {
-        ref <- ref[[1]]
-    }
+    # We have to clean it at the start to remove NAs before we do the build,
+    # otherwise 'test.genes' won't match up to the filtered 'test'.
+    test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)
 
     trained <- trainSingleR(
         ref, 
@@ -143,7 +119,8 @@ SingleR <- function(
         aggr.args = aggr.args, 
         recompute=recompute,
         restrict = restrict, 
-        check.missing=FALSE, 
+        test.genes=rownames(test),
+        check.missing=check.missing, 
         BNPARAM=BNPARAM, 
         num.threads = num.threads, 
         BPPARAM=BPPARAM

diff --git a/R/classifySingleR.R b/R/classifySingleR.R
@@ -111,8 +111,9 @@ classifySingleR <- function(
 
     results <- vector("list", length(trained))
     for (l in seq_along(results)) {
-        trained[[l]] <- .classify_internals(
+        results[[l]] <- .classify_internals(
             test=test, 
+            trained=trained[[l]],
             quantile=quantile, 
             fine.tune=fine.tune, 
             tune.thresh=tune.thresh, 
@@ -134,16 +135,19 @@ classifySingleR <- function(
     } 
 }
 
-#' @importFrom S4Vectors DataFrame metadata metadata<- I
-.classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
+.check_test_genes <- function(test, trained) {
     if (!is.null(trained$options$test.genes)) {
         if (!identical(trained$options$test.genes, rownames(test))) {
-            stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trainSingleR'")
+            stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trained'")
         }
     } else if (nrow(trained$ref) != nrow(test)) {
         stop("expected 'test' to have the same number of rows as the reference dataset")
     }
+}
 
+#' @importFrom S4Vectors DataFrame metadata metadata<- I
+.classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
+    .check_test_genes(test, trained)
     trained <- rebuildIndex(trained, num.threads = num.threads)
 
     parsed <- initializeCpp(test)

diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R
@@ -100,6 +100,7 @@
 #'
 #' @export
 #' @importFrom S4Vectors DataFrame metadata<-
+#' @importFrom beachmat initializeCpp
 combineRecomputedResults <- function(
     results, 
     test, 
@@ -112,54 +113,59 @@ combineRecomputedResults <- function(
     num.threads = bpnworkers(BPPARAM),
     BPPARAM=SerialParam())
 {
-    all.names <- c(list(colnames(test)), lapply(results, rownames))
-    if (length(unique(all.names)) != 1) {
-        stop("cell/cluster names in 'results' are not identical")
-    }
-    all.nrow <- c(ncol(test), vapply(results, nrow, 0L))
-    if (length(unique(all.nrow)) != 1) {
-        stop("numbers of cells/clusters in 'results' are not identical")
-    }
+    test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM)
 
-    # Checking the marker consistency.
-    all.refnames <- lapply(trained, function(x) rownames(x$ref))
-    intersected <- Reduce(intersect, all.refnames)
-    for (i in seq_along(trained)) {
-        if (!all(trained[[i]]$markers$unique %in% rownames(test))) {
-            stop("all markers stored in 'results' should be present in 'test'")
-        } else if (warn.lost && !all(trained[[i]]$markers$unique %in% intersected)) {
-            warning("entries of 'trained' differ in the universe of available markers")
+    # Applying the sanity checks.
+    stopifnot(length(results) == length(trained))
+    for (i in seq_along(results)) {
+        curres <- results[[i]]
+        if (ncol(test) != nrow(curres)) {
+            stop("numbers of cells/clusters in 'results' are not identical")
+        }
+        if (!identical(rownames(curres), colnames(test))) {
+            stop("cell/cluster names in 'results' are not identical")
         }
+
+        curtrain <- trained[[i]]
+        if (!all(curres$labels %in% curtrain$labels$unique)) {
+            stop("not all labels in 'results' are present in 'trained'")
+        }
+        .check_test_genes(test, curtrain)
     }
-    
+
     # Applying the integration.
+    all.refnames <- lapply(trained, function(x) rownames(x$ref))
     universe <- Reduce(union, c(list(rownames(test)), all.refnames))
-    ibuilt <- integrate_build(
-        match(rownames(test), universe) - 1L,
-        lapply(trained, function(x) initializeCpp(x$ref)),
-        lapply(trained, function(x) match(rownames(x$ref), universe) - 1L), 
-        lapply(trained, function(x) match(x$labels$full, x$labels$unique) - 1L),
-        lapply(trained, function(x) x$built),
+    ibuilt <- train_integrated(
+        test_features=match(rownames(test), universe) - 1L,
+        references=lapply(trained, function(x) initializeCpp(x$ref)),
+        ref_ids=lapply(all.refnames, function(x) match(x, universe) - 1L), 
+        labels=lapply(trained, function(x) match(x$labels$full, x$labels$unique) - 1L),
+        prebuilt=lapply(trained, function(x) rebuildIndex(x)$built),
         nthreads = num.threads
     )
 
-    test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM)
     collated <- vector("list", length(trained))
     for (i in seq_along(collated)) {
         collated[[i]] <- match(results[[i]]$labels, trained[[i]]$labels$unique) - 1L
     }
 
     parsed <- initializeCpp(test)
-    irun <- integrate_run(parsed, collated, ibuilt, quantile = quantile, nthreads = num.threads) 
-    scores <- irun$scores
+    irun <- classify_integrated(
+        test=parsed,
+        results=collated,
+        integrated_build=ibuilt,
+        quantile=quantile,
+        nthreads=num.threads
+    ) 
 
     # Organizing the outputs.
     base.scores <- vector("list", length(results))
     for (r in seq_along(base.scores)) {
         mat <- results[[r]]$scores   
         mat[] <- NA_real_
         idx <- cbind(seq_len(nrow(mat)), collated[[r]] + 1L)
-        mat[idx] <- scores[,r]
+        mat[idx] <- irun$scores[,r]
         base.scores[[r]] <- mat
     }
 

diff --git a/R/trainSingleR.R b/R/trainSingleR.R
@@ -293,6 +293,9 @@ trainSingleR <- function(
     if (!is.null(test.genes)) {
         ref <- DelayedArray(ref)[rownames(ref) %in% test.genes,,drop=FALSE]
     }
+    if (nrow(ref) == 0L) {
+        stop("no genes available for marker detection in the reference dataset")
+    }
 
     if (.is_list(genes)) {
         is.char <- vapply(genes, is.character, TRUE)

diff --git a/R/utils.R b/R/utils.R
@@ -24,10 +24,10 @@
         setAutoBPPARAM(BPPARAM)
         on.exit(setAutoBPPARAM(old))
 
-        x <- DelayedArray(x)
+        y <- DelayedArray(x)
         discard <- rowAnyNAs(x)
         if (any(discard)) {
-            x <- x[!discard,,drop=FALSE]
+            x <- y[!discard,,drop=FALSE]
         }
     }