Skip to content

Commit

Permalink
Continue inching further.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Sep 6, 2024
1 parent b8bd376 commit d1cd66f
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 32 deletions.
16 changes: 15 additions & 1 deletion R/SingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,24 @@ SingleR <- function(
on.exit(bpstop(BPPARAM))
}

# We have to clean it at the start to remove NAs before we do the build,
# We have to do all this row-subsetting at the start before trainSingleR,
# otherwise 'test.genes' won't match up to the filtered 'test'.
test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)

tmp.ref <- ref
if (!is.list(tmp.ref)) {
tmp.ref <- list(ref)
}
for (rr in tmp.ref) {
keep <- rownames(test) %in% rownames(rr)
if (!all(keep)) {
test <- DelayedArray(test)[keep,,drop=FALSE]
}
}
if (nrow(test) == 0) {
stop("no common genes between 'test' and 'ref")
}

trained <- trainSingleR(
ref,
labels,
Expand Down
11 changes: 5 additions & 6 deletions R/classifySingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
#' @param tune.thresh A numeric scalar specifying the maximum difference from the maximum correlation to use in fine-tuning.
#' @param sd.thresh Deprecated and ignored.
#' @param assay.type Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \linkS4class{SummarizedExperiment}.
#' @param check.missing Logical scalar indicating whether rows should be checked for missing values.
#' If true and any missing values are found, the rows containing these values are silently removed.
#' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.
#' @param prune A logical scalar indicating whether label pruning should be performed.
#' @param num.threads Integer scalar specifying the number of threads to use for classification.
#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying the parallelization scheme to use for \code{NA} scanning, when \code{check.missing=TRUE}.
Expand Down Expand Up @@ -98,11 +97,11 @@ classifySingleR <- function(
sd.thresh=NULL,
prune=TRUE,
assay.type="logcounts",
check.missing=TRUE,
check.missing=FALSE,
num.threads = bpnworkers(BPPARAM),
BPPARAM=SerialParam())
{
test <- .to_clean_matrix(test, assay.type, check.missing, msg="test", BPPARAM=BPPARAM)
test <- .to_clean_matrix(test, assay.type, check.missing=FALSE, msg="test", BPPARAM=BPPARAM)

solo <- .is_solo(trained)
if (solo) {
Expand Down Expand Up @@ -140,8 +139,8 @@ classifySingleR <- function(
if (!identical(trained$options$test.genes, rownames(test))) {
stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trained'")
}
} else if (nrow(trained$ref) != nrow(test)) {
stop("expected 'test' to have the same number of rows as the reference dataset")
} else if (!identical(rownames(trained$ref), rownames(test))) {
stop("expected 'rownames(test)' to be the same as 'rownames(ref)' in 'trained'")
}
}

Expand Down
16 changes: 13 additions & 3 deletions R/combineRecomputedResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @param trained A list of \linkS4class{List}s containing the trained outputs of multiple references,
#' equivalent to either (i) the output of \code{\link{trainSingleR}} on multiple references with \code{recompute=TRUE},
#' or (ii) running \code{trainSingleR} on each reference separately and manually making a list of the trained outputs.
#' @param warn.lost Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are \dQuote{lost} in other references.
#' @param warn.lost Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.
#' @param quantile Numeric scalar specifying the quantile of the correlation distribution to use for computing the score, see \code{\link{classifySingleR}}.
#' @param allow.lost Deprecated.
#'
Expand Down Expand Up @@ -108,8 +108,8 @@ combineRecomputedResults <- function(
quantile=0.8,
assay.type.test="logcounts",
check.missing=TRUE,
allow.lost=FALSE,
warn.lost=TRUE,
allow.lost=FALSE,
num.threads = bpnworkers(BPPARAM),
BPPARAM=SerialParam())
{
Expand All @@ -133,8 +133,18 @@ combineRecomputedResults <- function(
.check_test_genes(test, curtrain)
}

# Applying the integration.
# Checking the genes.
all.refnames <- lapply(trained, function(x) rownames(x$ref))
if (warn.lost) {
intersected <- Reduce(intersect, all.refnames)
for (i in seq_along(trained)) {
if (!all(trained[[i]]$markers$unique %in% intersected)) {
warning("not all markers in 'trained' are available in each reference")
}
}
}

# Applying the integration.
universe <- Reduce(union, c(list(rownames(test)), all.refnames))
ibuilt <- train_integrated(
test_features=match(rownames(test), universe) - 1L,
Expand Down
3 changes: 0 additions & 3 deletions R/trainSingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,6 @@ trainSingleR <- function(
}
if (!is.null(test.genes)) {
ref <- DelayedArray(ref)[rownames(ref) %in% test.genes,,drop=FALSE]
if (nrow(ref) == 0L) {
stop("no common genes between 'test' and 'ref'")
}
}

if (.is_list(genes)) {
Expand Down
5 changes: 2 additions & 3 deletions man/classifySingleR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions man/combineRecomputedResults.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 7 additions & 13 deletions tests/testthat/test-recomputed.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ test <- .mockTestData(ref)
test <- scuttle::logNormCounts(test)

ref1 <- scuttle::logNormCounts(ref1)
train1 <- trainSingleR(ref1, labels=ref1$label)
train1 <- trainSingleR(ref1, labels=ref1$label, test.genes=rownames(test))
pred1 <- classifySingleR(test, train1)

ref2 <- scuttle::logNormCounts(ref2)
train2 <- trainSingleR(ref2, labels=ref2$label)
train2 <- trainSingleR(ref2, labels=ref2$label, test.genes=rownames(test))
pred2 <- classifySingleR(test, train2)

test_that("combineRecomputedResults works as expected (light check)", {
Expand Down Expand Up @@ -100,18 +100,12 @@ test_that("combineRecomputedResults handles mismatches to rows and cells", {
trained=list(train1, train2)), "not identical")
colnames(test) <- NULL

# Correctly reorders the gene universes.
ref <- combineRecomputedResults(
results=list(pred1, pred2),
test=test,
trained=list(train1, train2))

# Responds to mismatches in the genes.
s <- sample(nrow(test))
out <- combineRecomputedResults(
expect_error(combineRecomputedResults(
results=list(pred1, pred2),
test=test[s,],
trained=list(train1, train2))
expect_equal(ref, out)
trained=list(train1, train2)), "test.genes")
})

test_that("combineRecomputedResults emits warnings when missing genes are present", {
Expand All @@ -120,13 +114,13 @@ test_that("combineRecomputedResults emits warnings when missing genes are presen
rownames(ref1b)[1] <- "BLAH"
markers1 <- train1$markers$full
markers1$A$B <- c(markers1$A$B, "BLAH")
train1b <- trainSingleR(ref1b, labels=ref1$label, genes=markers1)
train1b <- trainSingleR(ref1b, labels=ref1$label, genes=markers1, test.genes=rownames(test))

ref2b <- ref2[c(1, seq_len(nrow(ref2))),]
rownames(ref2b)[1] <- "WHEE"
markers2 <- train2$markers$full
markers2$A$B <- c(markers2$a$b, "WHEE")
train2b <- trainSingleR(ref2b, labels=ref2$label, genes=markers2)
train2b <- trainSingleR(ref2b, labels=ref2$label, genes=markers2, test.genes=rownames(test))

expect_error(out <- combineRecomputedResults(
results=list(pred1, pred2),
Expand Down

0 comments on commit d1cd66f

Please sign in to comment.