Docfixes, mostly for the new link policy.

SingleR-inc · Jan 7, 2025 · e3a92fd · e3a92fd
1 parent 74e1f58
commit e3a92fd
Show file tree

Hide file tree

Showing 29 changed files with 94 additions and 96 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SingleR
 Title: Reference-Based Single-Cell RNA-Seq Annotation
-Version: 2.9.4
-Date: 2024-12-20
+Version: 2.9.5
+Date: 2025-01-07
 Authors@R: c(person("Dvir", "Aran", email="[email protected]", role=c("aut", "cph")),
     person("Aaron", "Lun", email="[email protected]", role=c("ctb", "cre")),
     person("Daniel", "Bunis", role="ctb"),

diff --git a/R/SingleR.R b/R/SingleR.R
@@ -4,13 +4,13 @@
 #' given a labelled reference dataset in the same feature space.
 #'
 #' @param test A numeric matrix of single-cell expression values where rows are genes and columns are cells.
-#' Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
+#' Alternatively, a \link[SummarizedExperiment]{SummarizedExperiment} object containing such a matrix.
 #' @inheritParams trainSingleR
 #' @param ref A numeric matrix of (usually normalized and log-transformed) expression values from a reference dataset,
-#' or a \linkS4class{SummarizedExperiment} object containing such a matrix;
+#' or a \link[SummarizedExperiment]{SummarizedExperiment} object containing such a matrix;
 #' see \code{\link{trainSingleR}} for details.
 #'
-#' Alternatively, a list or \linkS4class{List} of SummarizedExperiment objects or numeric matrices containing multiple references.
+#' Alternatively, a list or \link[S4Vectors]{List} of SummarizedExperiment objects or numeric matrices containing multiple references.
 #' Row names may be different across entries but only the intersection will be used, see Details.
 #' @param method Deprecated.
 #' @param clusters A character vector or factor of cluster identities for each cell in \code{test}.
@@ -19,18 +19,18 @@
 #' @param aggr.ref,aggr.args Arguments controlling the aggregation of the references prior to annotation, see \code{\link{trainSingleR}}.
 #' @param quantile,fine.tune,tune.thresh,prune Further arguments to pass to \code{\link{classifySingleR}}.
 #' @param assay.type.test An integer scalar or string specifying the assay of \code{test} containing the relevant expression matrix,
-#' if \code{test} is a \linkS4class{SummarizedExperiment} object.
+#' if \code{test} is a \link[SummarizedExperiment]{SummarizedExperiment} object.
 #' @param assay.type.ref An integer scalar or string specifying the assay of \code{ref} containing the relevant expression matrix,
-#' if \code{ref} is a \linkS4class{SummarizedExperiment} object (or is a list that contains one or more such objects).
+#' if \code{ref} is a \link[SummarizedExperiment]{SummarizedExperiment} object (or is a list that contains one or more such objects).
 #' @param check.missing.test Logical scalar indicating whether rows of \code{test} should be checked for missing values (and if found, removed).
 #' @param check.missing.ref Logical scalar indicating whether rows of \code{ref} should be checked for missing values (and if found, removed).
 #' @param check.missing Deprecated, use \code{check.missing.test} and \code{check.missing.ref} instead.
 #' @param num.threads Integer scalar specifying the number of threads to use for index building and classification.
 #' @param BNPARAM Deprecated and ignored.
-#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed in other steps,
+#' @param BPPARAM A \link[BiocParallel]{BiocParallelParam} object specifying how parallelization should be performed in other steps,
 #' see \code{?\link{trainSingleR}} and \code{?\link{classifySingleR}} for more details.
 #'
-#' @return A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell (one cell per row).
+#' @return A \link[S4Vectors]{DataFrame} is returned containing the annotation statistics for each cell (one cell per row).
 #' This is identical to the output of \code{\link{classifySingleR}}.
 #'
 #' @details

diff --git a/R/aggregateReference.R b/R/aggregateReference.R
@@ -4,13 +4,15 @@
 #' This can be done with varying degrees of resolution to preserve the within-label heterogeneity.
 #'
 #' @param ref A numeric matrix of reference expression values, usually containing log-expression values.
-#' Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
+#' Alternatively, a \link[SummarizedExperiment]{SummarizedExperiment} object containing such a matrix.
 #' @param labels A character vector or factor of known labels for all cells in \code{ref}.
 #' @param ncenters Integer scalar specifying the maximum number of aggregated profiles to produce for each label.
+#' If \code{NULL}, a suitable number of profiles is automatically chosen. 
 #' @param power Numeric scalar between 0 and 1 indicating how much aggregation should be performed, see Details.
+#' Ignored if \code{ncenters} is not \code{NULL}.
 #' @param rank Integer scalar specfiying the number of principal components to use during clustering.
 #' @param assay.type An integer scalar or string specifying the assay of \code{ref} containing the relevant expression matrix,
-#' if \code{ref} is a \linkS4class{SummarizedExperiment} object.
+#' if \code{ref} is a \link[SummarizedExperiment]{SummarizedExperiment} object.
 #' @param ntop Integer scalar specifying the number of highly variable genes to use for the PCA step.
 #' @param subset.row Integer, character or logical vector indicating the rows of \code{ref} to use for k-means clustering. 
 #' @param check.missing Logical scalar indicating whether rows should be checked for missing values (and if found, removed).
@@ -31,7 +33,7 @@
 #' This reduces the number of separate observations (for speed) while preserving some level of population heterogeneity (for fidelity).
 #' 
 #' The number of pseudo-bulk samples per label is controlled by \code{ncenters}.
-#' By default, we set the number of clusters to \code{X^power} where \code{X} is the number of cells for that label.
+#' If \code{ncenters=NULL}, we set the number of clusters to \code{X^power} where \code{X} is the number of cells for that label.
 #' This ensures that labels with more cells have more resolved representatives.
 #' If \code{ncenters} is greater than the number of samples for a label and/or \code{power=1}, no aggregation is performed.
 #' Setting \code{power=0} will aggregate all cells of a label into a single pseudo-bulk profile.
@@ -53,7 +55,7 @@
 #' We do not use the median to avoid consistently obtaining zeros for lowly expressed genes.
 #' 
 #' @return
-#' A \linkS4class{SummarizedExperiment} object with a \code{"logcounts"} assay containing a matrix of aggregated expression values,
+#' A \link[SummarizedExperiment]{SummarizedExperiment} object with a \code{"logcounts"} assay containing a matrix of aggregated expression values,
 #' and a \code{label} column metadata field specifying the label corresponding to each column.
 #' 
 #' @author Aaron Lun

diff --git a/R/classifySingleR.R b/R/classifySingleR.R
@@ -5,22 +5,22 @@
 #' @param test A numeric matrix of single-cell expression values where rows are genes and columns are cells.
 #' Each row should be named with the gene name.
 #'
-#' Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
-#' @param trained A \linkS4class{List} containing the output of the \code{\link{trainSingleR}} function.
+#' Alternatively, a \link[SummarizedExperiment]{SummarizedExperiment} object containing such a matrix.
+#' @param trained A \link[S4Vectors]{List} containing the output of the \code{\link{trainSingleR}} function.
 #' If the row names of \code{test} are not exactly the same as the reference dataset, the call to \code{trainSingleR} should set \code{test.genes=rownames(test)}.
 #'
 #' Alternatively, a List of Lists produced by \code{\link{trainSingleR}} for multiple references.
 #' @param quantile A numeric scalar specifying the quantile of the correlation distribution to use to compute the score for each label.
 #' @param fine.tune A logical scalar indicating whether fine-tuning should be performed. 
 #' @param tune.thresh A numeric scalar specifying the maximum difference from the maximum correlation to use in fine-tuning.
 #' @param sd.thresh Deprecated and ignored.
-#' @param assay.type Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \linkS4class{SummarizedExperiment}.
+#' @param assay.type Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \link[SummarizedExperiment]{SummarizedExperiment}.
 #' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.
 #' @param prune A logical scalar indicating whether label pruning should be performed.
 #' @param num.threads Integer scalar specifying the number of threads to use for classification.
-#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying the parallelization scheme to use for \code{NA} scanning, when \code{check.missing=TRUE}.
+#' @param BPPARAM A \link[BiocParallel]{BiocParallelParam} object specifying the parallelization scheme to use for \code{NA} scanning, when \code{check.missing=TRUE}.
 #' 
-#' @return A \linkS4class{DataFrame} where each row corresponds to a cell in \code{test}.
+#' @return A \link[S4Vectors]{DataFrame} where each row corresponds to a cell in \code{test}.
 #' In the case of a single reference, this contains:
 #' \itemize{
 #' \item \code{scores}, a numeric matrix of correlations at the specified \code{quantile} for each label (column) in each cell (row).
@@ -33,7 +33,7 @@
 #' Only added if \code{prune=TRUE}.
 #' }
 #'
-#' The \code{\link{metadata}} of the DataFrame contains:
+#' The \code{\link[S4Vectors]{metadata}} of the DataFrame contains:
 #' \itemize{
 #' \item \code{common.genes}, a character vector of genes used to compute the correlations prior to fine-tuning.
 #' \item \code{de.genes}, a list of list of character vectors, containing the genes used to distinguish between each pair of labels.

diff --git a/R/combineRecomputedResults.R b/R/combineRecomputedResults.R
@@ -1,12 +1,12 @@
 #' Combine SingleR results with recomputation
 #'
-#' Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \linkS4class{DataFrame}.
+#' Combine results from multiple runs of \code{\link{classifySingleR}} (usually against different references) into a single \link[S4Vectors]{DataFrame}.
 #' This involves recomputing the scores so that they are comparable across references.
 #'
-#' @param results A list of \linkS4class{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.
+#' @param results A list of \link[S4Vectors]{DataFrame} prediction results as returned by \code{\link{classifySingleR}} when run on each reference separately.
 #' @inheritParams SingleR
 #' @param check.missing Deprecated and ignored, as any row filtering will cause mismatches with the \code{test.genes=} used in \code{\link{trainSingleR}}.
-#' @param trained A list of \linkS4class{List}s containing the trained outputs of multiple references,
+#' @param trained A list of \link[S4Vectors]{List}gs containing the trained outputs of multiple references,
 #' equivalent to either (i) the output of \code{\link{trainSingleR}} on multiple references with \code{recompute=TRUE},
 #' or (ii) running \code{trainSingleR} on each reference separately and manually making a list of the trained outputs.
 #' @param warn.lost Logical scalar indicating whether to emit a warning if markers from one reference in \code{trained} are absent in other references.
@@ -15,7 +15,7 @@
 #' @param tune.thresh A numeric scalar specifying the maximum difference from the maximum correlation to use in fine-tuning.
 #' @param allow.lost Deprecated.
 #'
-#' @return A \linkS4class{DataFrame} is returned containing the annotation statistics for each cell or cluster (row).
+#' @return A \link[S4Vectors]{DataFrame} is returned containing the annotation statistics for each cell or cluster (row).
 #' This mimics the output of \code{\link{classifySingleR}} and contains the following fields:
 #' \itemize{
 #' \item \code{scores}, a DataFrame of DataFrames containing the \emph{recomputed} scores for the best label in each reference.
@@ -28,9 +28,6 @@
 #' }
 #' It may also contain \code{pruned.labels} if these were also present in \code{results}.
 #'
-#' The \code{\link{metadata}} contains \code{label.origin}, 
-#' a DataFrame specifying the reference of origin for each label in \code{scores}.
-#'
 #' @details
 #' Here, the strategy is to perform classification separately within each reference, 
 #' then collate the results to choose the label with the highest score across references.

diff --git a/R/datasets.R b/R/datasets.R
@@ -5,7 +5,7 @@
 #'
 #' @param ... Further arguments to pass to the \pkg{celldex} function of the same name.
 #'
-#' @return A \linkS4class{SummarizedExperiment} object containing the reference dataset.
+#' @return A \link[SummarizedExperiment]{SummarizedExperiment} object containing the reference dataset.
 #'
 #' @author Aaron Lun
 #' 

diff --git a/R/getClassicMarkers.R b/R/getClassicMarkers.R
@@ -7,7 +7,7 @@
 #' @param de.n An integer scalar specifying the number of DE genes to use.
 #' Defaults to \code{500 * (2/3) ^ log2(N)} where \code{N} is the number of unique labels.
 #' @param num.threads Integer scalar specifying the number of threads to use.
-#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying how parallelization should be performed.
+#' @param BPPARAM A \link[BiocParallel]{BiocParallelParam} object specifying how parallelization should be performed.
 #'
 #' @return
 #' A list of lists of character vectors, 

diff --git a/R/matchReferences.R b/R/matchReferences.R
@@ -4,7 +4,7 @@
 #' but with differences in nomenclature.
 #' 
 #' @param ref1,ref2 Numeric matrices of single-cell (usually normalized and log-transformed) expression values where rows are genes and columns are cells.
-#' Alternatively, \linkS4class{SummarizedExperiment} objects containing such matrices.
+#' Alternatively, \link[SummarizedExperiment]{SummarizedExperiment} objects containing such matrices.
 #' @param labels1,labels2 A character vector or factor of known labels for all cells in \code{ref1} and \code{ref2}, respectively.
 #' @param ... Further arguments to pass to \code{\link{SingleR}}.
 #'

diff --git a/R/mockData.R b/R/mockData.R
@@ -6,16 +6,16 @@
 #' @param nreps Integer scalar specifying the number of replicates per group.
 #' @param ngenes Integer scalar specifying the number of genes in the dataset.
 #' @param prop Numeric scalar specifying the proportion of genes that are DE between groups.
-#' @param mock.ref A \linkS4class{SummarizedExperiment} object produced by \code{.mockRefData}.
+#' @param mock.ref A \link[SummarizedExperiment]{SummarizedExperiment} object produced by \code{.mockRefData}.
 #' @param ncells Integer scalar specifying the number of cells to simulate.
 #'
 #' @details
 #' This functions are simply provided to simulate some data in the Examples of the documentation.
 #' The simulations are very simple and should not be used for performance comparisons.
 #'
 #' @return 
-#' Both functions return a \linkS4class{SummarizedExperiment} object containing simulated counts in the \code{counts} assay,
-#' with the group assignment of each sample in the \code{"label"} field of the \code{\link{colData}}.
+#' Both functions return a \link[SummarizedExperiment]{SummarizedExperiment} object containing simulated counts in the \code{counts} assay,
+#' with the group assignment of each sample in the \code{"label"} field of the \code{\link[SummarizedExperiment]{colData}}.
 #' 
 #' @author Aaron Lun
 #' @examples

diff --git a/R/plotMarkerHeatmap.R b/R/plotMarkerHeatmap.R
@@ -2,15 +2,15 @@
 #'
 #' Create a heatmap of the log-normalized expression for the most interesting markers of a particular label.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
+#' @param results A \link[S4Vectors]{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
 #' @param test A numeric matrix of log-normalized expression values where rows are genes and columns are cells.
 #' Each row should be named with the same gene name that was used to compute \code{results}.
 #'
-#' Alternatively, a \linkS4class{SummarizedExperiment} object containing such a matrix.
+#' Alternatively, a \link[SummarizedExperiment]{SummarizedExperiment} object containing such a matrix.
 #' @param label String specifying the label of interest.
 #' @param other.labels Character vector specifying the other labels to be compared to \code{label} when finding interesting markers.
 #' Defaults to all available labels.
-#' @param assay.type Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \linkS4class{SummarizedExperiment}.
+#' @param assay.type Integer scalar or string specifying the matrix of expression values to use if \code{test} is a \link[SummarizedExperiment]{SummarizedExperiment}.
 #' @param use.pruned Logical scalar indicating whether the pruned labels should be used instead.
 #' @param order.by.effect String specifying the effect size from \code{\link[scrapper]{scoreMarkers}} with which to sort for interesting markers.
 #' @param order.by.summary String specifying the summary statistic from \code{\link[scrapper]{scoreMarkers}} with which to sort for interesting markers.

diff --git a/R/plotScoreDistribution.R b/R/plotScoreDistribution.R
@@ -2,7 +2,7 @@
 #'
 #' Plot the distribution of assignment scores across all cells assigned to each reference label.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
+#' @param results A \link[S4Vectors]{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
 #' @param show Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}.
 #' @param labels.use Character vector specifying the labels to show in the plot facets.
 #' Defaults to all labels in \code{results}.

diff --git a/R/plotScoreHeatmap.R b/R/plotScoreHeatmap.R
@@ -2,7 +2,7 @@
 #'
 #' Create a heatmap of the \code{\link{SingleR}} assignment scores across all cell-label combinations.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
+#' @param results A \link[S4Vectors]{DataFrame} containing the output from \code{\link{SingleR}}, \code{\link{classifySingleR}}, or \code{\link{combineRecomputedResults}}.
 #' @param cells.use Integer or string vector specifying the single cells (i.e., rows of \code{results}) to show.
 #' If \code{NULL}, all cells are shown.
 #' @param labels.use Character vector specifying the labels to show in the heatmap rows.

diff --git a/R/pruneScores.R b/R/pruneScores.R
@@ -2,7 +2,7 @@
 #'
 #' Remove low-quality assignments based on the cell-label score matrix returned by \code{\link{classifySingleR}}.
 #'
-#' @param results A \linkS4class{DataFrame} containing the output generated by \code{\link{SingleR}} or \code{\link{classifySingleR}}.
+#' @param results A \link[S4Vectors]{DataFrame} containing the output generated by \code{\link{SingleR}} or \code{\link{classifySingleR}}.
 #' @param nmads Numeric scalar specifying the number of MADs to use for defining low outliers in the per-label distribution of delta values (i.e., difference from median).
 #' @param min.diff.med Numeric scalar specifying the minimum acceptable delta for each cell.
 #' @param min.diff.next Numeric scalar specifying the minimum difference between the best score and the next best score in fine-tuning.