Skip to content

Commit

Permalink
Inch a bit closer.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Sep 6, 2024
1 parent 81c860e commit 3faaf82
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 61 deletions.
33 changes: 5 additions & 28 deletions R/SingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,34 +102,10 @@ SingleR <- function(
bpstart(BPPARAM)
on.exit(bpstop(BPPARAM))
}
test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)

# Converting to a common list format for ease of data munging.
if (single.ref <- !.is_list(ref)) {
ref <- list(ref)
}

ref <- lapply(ref, FUN=.to_clean_matrix, assay.type=assay.type.ref,
check.missing=check.missing, msg="ref", BPPARAM=BPPARAM)
refnames <- Reduce(intersect, lapply(ref, rownames))

keep <- intersect(rownames(test), refnames)
if (length(keep) == 0) {
stop("no common genes between 'test' and 'ref'")
}
if (!identical(keep, rownames(test))) {
test <- test[keep,]
}
for (i in seq_along(ref)) {
if (!identical(keep, rownames(ref[[i]]))) {
ref[[i]] <- ref[[i]][keep,,drop=FALSE]
}
}

# Converting back.
if (single.ref) {
ref <- ref[[1]]
}
# We have to clean it at the start to remove NAs before we do the build,
# otherwise 'test.genes' won't match up to the filtered 'test'.
test <- .to_clean_matrix(test, assay.type.test, check.missing, msg="test", BPPARAM=BPPARAM)

trained <- trainSingleR(
ref,
Expand All @@ -143,7 +119,8 @@ SingleR <- function(
aggr.args = aggr.args,
recompute=recompute,
restrict = restrict,
check.missing=FALSE,
test.genes=rownames(test),
check.missing=check.missing,
BNPARAM=BNPARAM,
num.threads = num.threads,
BPPARAM=BPPARAM
Expand Down
12 changes: 8 additions & 4 deletions R/classifySingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ classifySingleR <- function(

results <- vector("list", length(trained))
for (l in seq_along(results)) {
trained[[l]] <- .classify_internals(
results[[l]] <- .classify_internals(
test=test,
trained=trained[[l]],
quantile=quantile,
fine.tune=fine.tune,
tune.thresh=tune.thresh,
Expand All @@ -134,16 +135,19 @@ classifySingleR <- function(
}
}

#' @importFrom S4Vectors DataFrame metadata metadata<- I
.classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
.check_test_genes <- function(test, trained) {
if (!is.null(trained$options$test.genes)) {
if (!identical(trained$options$test.genes, rownames(test))) {
stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trainSingleR'")
stop("expected 'rownames(test)' to be the same as 'test.genes' in 'trained'")
}
} else if (nrow(trained$ref) != nrow(test)) {
stop("expected 'test' to have the same number of rows as the reference dataset")
}
}

#' @importFrom S4Vectors DataFrame metadata metadata<- I
.classify_internals <- function(test, trained, quantile, fine.tune, tune.thresh=0.05, prune=TRUE, num.threads=1) {
.check_test_genes(test, trained)
trained <- rebuildIndex(trained, num.threads = num.threads)

parsed <- initializeCpp(test)
Expand Down
60 changes: 33 additions & 27 deletions R/combineRecomputedResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
#'
#' @export
#' @importFrom S4Vectors DataFrame metadata<-
#' @importFrom beachmat initializeCpp
combineRecomputedResults <- function(
results,
test,
Expand All @@ -112,54 +113,59 @@ combineRecomputedResults <- function(
num.threads = bpnworkers(BPPARAM),
BPPARAM=SerialParam())
{
all.names <- c(list(colnames(test)), lapply(results, rownames))
if (length(unique(all.names)) != 1) {
stop("cell/cluster names in 'results' are not identical")
}
all.nrow <- c(ncol(test), vapply(results, nrow, 0L))
if (length(unique(all.nrow)) != 1) {
stop("numbers of cells/clusters in 'results' are not identical")
}
test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM)

# Checking the marker consistency.
all.refnames <- lapply(trained, function(x) rownames(x$ref))
intersected <- Reduce(intersect, all.refnames)
for (i in seq_along(trained)) {
if (!all(trained[[i]]$markers$unique %in% rownames(test))) {
stop("all markers stored in 'results' should be present in 'test'")
} else if (warn.lost && !all(trained[[i]]$markers$unique %in% intersected)) {
warning("entries of 'trained' differ in the universe of available markers")
# Applying the sanity checks.
stopifnot(length(results) == length(trained))
for (i in seq_along(results)) {
curres <- results[[i]]
if (ncol(test) != nrow(curres)) {
stop("numbers of cells/clusters in 'results' are not identical")
}
if (!identical(rownames(curres), colnames(test))) {
stop("cell/cluster names in 'results' are not identical")
}

curtrain <- trained[[i]]
if (!all(curres$labels %in% curtrain$labels$unique)) {
stop("not all labels in 'results' are present in 'trained'")
}
.check_test_genes(test, curtrain)
}

# Applying the integration.
all.refnames <- lapply(trained, function(x) rownames(x$ref))
universe <- Reduce(union, c(list(rownames(test)), all.refnames))
ibuilt <- integrate_build(
match(rownames(test), universe) - 1L,
lapply(trained, function(x) initializeCpp(x$ref)),
lapply(trained, function(x) match(rownames(x$ref), universe) - 1L),
lapply(trained, function(x) match(x$labels$full, x$labels$unique) - 1L),
lapply(trained, function(x) x$built),
ibuilt <- train_integrated(
test_features=match(rownames(test), universe) - 1L,
references=lapply(trained, function(x) initializeCpp(x$ref)),
ref_ids=lapply(all.refnames, function(x) match(x, universe) - 1L),
labels=lapply(trained, function(x) match(x$labels$full, x$labels$unique) - 1L),
prebuilt=lapply(trained, function(x) rebuildIndex(x)$built),
nthreads = num.threads
)

test <- .to_clean_matrix(test, assay.type=assay.type.test, check.missing=check.missing, msg="test", BPPARAM=BPPARAM)
collated <- vector("list", length(trained))
for (i in seq_along(collated)) {
collated[[i]] <- match(results[[i]]$labels, trained[[i]]$labels$unique) - 1L
}

parsed <- initializeCpp(test)
irun <- integrate_run(parsed, collated, ibuilt, quantile = quantile, nthreads = num.threads)
scores <- irun$scores
irun <- classify_integrated(
test=parsed,
results=collated,
integrated_build=ibuilt,
quantile=quantile,
nthreads=num.threads
)

# Organizing the outputs.
base.scores <- vector("list", length(results))
for (r in seq_along(base.scores)) {
mat <- results[[r]]$scores
mat[] <- NA_real_
idx <- cbind(seq_len(nrow(mat)), collated[[r]] + 1L)
mat[idx] <- scores[,r]
mat[idx] <- irun$scores[,r]
base.scores[[r]] <- mat
}

Expand Down
3 changes: 3 additions & 0 deletions R/trainSingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@ trainSingleR <- function(
if (!is.null(test.genes)) {
ref <- DelayedArray(ref)[rownames(ref) %in% test.genes,,drop=FALSE]
}
if (nrow(ref) == 0L) {
stop("no genes available for marker detection in the reference dataset")
}

if (.is_list(genes)) {
is.char <- vapply(genes, is.character, TRUE)
Expand Down
4 changes: 2 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
setAutoBPPARAM(BPPARAM)
on.exit(setAutoBPPARAM(old))

x <- DelayedArray(x)
y <- DelayedArray(x)
discard <- rowAnyNAs(x)
if (any(discard)) {
x <- x[!discard,,drop=FALSE]
x <- y[!discard,,drop=FALSE]
}
}

Expand Down

0 comments on commit 3faaf82

Please sign in to comment.