From 40f23329f380ec65d6173f2f5d39fc43b8382336 Mon Sep 17 00:00:00 2001
From: Artur-man
Date: Thu, 2 Jan 2025 15:11:47 +0100
Subject: [PATCH] update niche clustering tutorial and fix a bug in hot spot
analysis
---
R/spatial.R | 1 +
docs/_site.yml | 2 +-
docs/conversion.html | 2 +-
docs/importingdata.html | 2 +-
docs/index.html | 2 +-
docs/interactive.Rmd | 2 +-
docs/interactive.html | 4 +-
docs/moleculeanalysis.html | 2 +-
docs/multiomic.html | 2 +-
...{deconvolution.Rmd => nicheclustering.Rmd} | 81 +++++--
...econvolution.html => nicheclustering.html} | 200 +++++++++++-------
docs/ondisk.html | 2 +-
docs/pixelanalysis.html | 2 +-
docs/registration.html | 2 +-
docs/roianalysis.html | 2 +-
docs/spotanalysis.html | 2 +-
docs/tutorials.Rmd | 2 +-
docs/tutorials.html | 4 +-
docs/voltronobjects.html | 2 +-
19 files changed, 204 insertions(+), 114 deletions(-)
rename docs/{deconvolution.Rmd => nicheclustering.Rmd} (63%)
rename docs/{deconvolution.html => nicheclustering.html} (78%)
diff --git a/R/spatial.R b/R/spatial.R
index 85c1ee4..95a1d1c 100644
--- a/R/spatial.R
+++ b/R/spatial.R
@@ -355,6 +355,7 @@ getHotSpotAnalysis <- function(object, assay = NULL, method = "Getis-Ord", featu
# calculate getis ord
n <- length(statistic)
+ statistic <- statistic - (min(statistic)) ### correct for negative scores, correct for this later
getisord_stat <- adj_matrix %*% statistic
getisord_stat <- getisord_stat/(sum(statistic) - statistic)
getisord[[1]] <- getisord_stat[,1]
diff --git a/docs/_site.yml b/docs/_site.yml
index e43468c..4e88edf 100644
--- a/docs/_site.yml
+++ b/docs/_site.yml
@@ -14,7 +14,7 @@ navbar:
- text: "Multi-omic Integration"
href: multiomic.html
- text: "Niche Clustering"
- href: deconvolution.html
+ href: nicheclustering.html
- text: "Downstream Analysis"
menu:
- text: "ROI Analysis"
diff --git a/docs/conversion.html b/docs/conversion.html
index 29d361e..4736775 100644
--- a/docs/conversion.html
+++ b/docs/conversion.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/importingdata.html b/docs/importingdata.html
index aefb67d..d1589e6 100644
--- a/docs/importingdata.html
+++ b/docs/importingdata.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/index.html b/docs/index.html
index 868a921..411881b 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -264,7 +264,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/interactive.Rmd b/docs/interactive.Rmd
index 365f5db..d0162d7 100644
--- a/docs/interactive.Rmd
+++ b/docs/interactive.Rmd
@@ -37,7 +37,7 @@ knitr::opts_chunk$set(highlight = TRUE, echo = TRUE)
# Interactive Annotation
-**VoltRon** includes interactive applications to select and manually label spatial points by drawing polygons and circles. As an example, we will use a Spot-based spatial transcriptomic assay, specifically the **Mouse Brain Serial Section 1/2** datasets, analyzed in the [Niche Clustering](deconvolution.html) tutorial. You can find the already analyzed data stored as a VoltRon object [here](https://bimsbstatic.mdc-berlin.de/landthaler/VoltRon/NicheClustering/Visium&Visium_data_decon_analyzed.rds)
+**VoltRon** includes interactive applications to select and manually label spatial points by drawing polygons and circles. As an example, we will use a Spot-based spatial transcriptomic assay, specifically the **Mouse Brain Serial Section 1/2** datasets, analyzed in the [Niche Clustering](nicheclustering.html) tutorial. You can find the already analyzed data stored as a VoltRon object [here](https://bimsbstatic.mdc-berlin.de/landthaler/VoltRon/NicheClustering/Visium&Visium_data_decon_analyzed.rds)
```{r class.source="watch-out", eval = FALSE}
MBrain_Sec <- readRDS("Visium&Visium_data_decon_analyzed.rds")
diff --git a/docs/interactive.html b/docs/interactive.html
index 21d9c44..e2c0652 100644
--- a/docs/interactive.html
+++ b/docs/interactive.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
@@ -464,7 +464,7 @@ Interactive Annotation
and manually label spatial points by drawing polygons and circles. As an
example, we will use a Spot-based spatial transcriptomic assay,
specifically the Mouse Brain Serial Section 1/2
-datasets, analyzed in the Niche
+datasets, analyzed in the Niche
Clustering tutorial. You can find the already analyzed data stored
as a VoltRon object here
diff --git a/docs/moleculeanalysis.html b/docs/moleculeanalysis.html
index ab4e07d..e3e7447 100644
--- a/docs/moleculeanalysis.html
+++ b/docs/moleculeanalysis.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/multiomic.html b/docs/multiomic.html
index b114bd9..f176827 100644
--- a/docs/multiomic.html
+++ b/docs/multiomic.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/deconvolution.Rmd b/docs/nicheclustering.Rmd
similarity index 63%
rename from docs/deconvolution.Rmd
rename to docs/nicheclustering.Rmd
index 148104c..10ec0fb 100644
--- a/docs/deconvolution.Rmd
+++ b/docs/nicheclustering.Rmd
@@ -37,9 +37,9 @@ knitr::opts_chunk$set(highlight = TRUE, echo = TRUE)
# Spot-based Niche Clustering
-Spot-based spatial transcriptomic assays capture spatially-resolved gene expression profiles that are somewhat closer to single cell resolution. However, each spot still include a few number of cells that are likely from a combination of cell types within the tissue of origin. RNA deconvolution is then incorporated to estimate the percentage/abundance of these cell types within each spot/ROI given a reference scRNAseq dataset.
+Spot-based spatial transcriptomic assays capture spatially-resolved gene expression profiles that are somewhat closer to single cell resolution. However, each spot still includes a few number of cells that are likely originated from few number of cell types, hence transcriptomic profile of each spot would likely include markers from multiple cell types. Here, **RNA deconvolution** can be incorporated to estimate the percentage/abundance of cell types for each spot. We use a scRNAseq dataset as a reference to computationally estimate the relative abundance of cell types across across the spots.
-VoltRon includes wrapper commands for using popular RNA deconvolution methods such as [RCTD](https://www.nature.com/articles/s41587-021-00830-w) (**spot**) and return estimated abundances as additional assays within each layer. These estimated percentages of cell types each spot could be incorporated to detect **niches** (i.e. small local microenvironments of cells) within the tissue. We can process cell type abundance assays and used them for clustering to detect these niches.
+VoltRon includes wrapper commands for using popular spot-level RNA deconvolution methods such as [RCTD](https://www.nature.com/articles/s41587-021-00830-w) and return estimated abundances as additional feature sets within each layer. These estimated percentages of cell types for each spot could be incorporated to detect **niches** (i.e. small local microenvironments of cells) within the tissue. We can process cell type abundance assays and used them for clustering to detect these niches.
@@ -47,7 +47,7 @@ VoltRon includes wrapper commands for using popular RNA deconvolution methods su
For this tutorial we will analyze spot-based transcriptomic assays from Mouse Brain generated by the [Visium](https://www.10xgenomics.com/products/spatial-gene-expression) instrument. The **Mouse Brain Serial Section 1/2** datasets can be downloaded from [here](https://www.10xgenomics.com/resources/datasets?menu%5Bproducts.name%5D=Spatial%20Gene%20Expression&query=&page=1&configure%5BhitsPerPage%5D=50&configure%5BmaxValuesPerFacet%5D=1000) (specifically, please filter for **Species=Mouse**, **AnatomicalEntity=brain**, **Chemistry=v1** and **PipelineVersion=v1.1.0**).
-We will now import each of four samples separately and merge them into one VoltRon object. There are four sections in total given two serial anterior and serial posterior sections, hence we have **two tissue blocks each having two layers**.
+We will now import each of four samples separately and merge them into one VoltRon object. There are four brain tissue sections in total given two serial anterior and serial posterior sections, hence we have **two tissue blocks each having two layers**.
```{r eval = FALSE, class.source="watch-out"}
library(VoltRon)
@@ -85,11 +85,16 @@ vrSpatialFeaturePlot(MBrain_Sec, features = "Count", crop = TRUE, alpha = 1, nco
## Import scRNA data
-We will now import the scRNA data for reference which can be downloaded from [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1). Specifically, we will use a scRNA reference of Mouse cortical adult brain with 14,000 cells from the Allen Institute, generated with the SMART-Seq2 protocol. This scRNA data is also used by the Spatial Data Analysis tutorial in [Seurat website](https://satijalab.org/seurat/articles/spatial_vignette.html#integration-with-single-cell-data).
-
-Before deconvoluting Visium spots, we correct cell types labels and drop some cell types with extremely few number of cells (e.g. "CR").
+We will now import the scRNA data for reference which can be downloaded from [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1). Specifically, we will use a scRNA data of Mouse cortical adult brain with 14,000 cells, generated with the SMART-Seq2 protocol, from the Allen Institute. This scRNA data is also used by the Spatial Data Analysis tutorial in [Seurat](https://satijalab.org/seurat/articles/spatial_vignette.html#integration-with-single-cell-data) website.
```{r eval = FALSE, class.source="watch-out"}
+# install packages if necessary
+if(!requireNamespace("Seurat"))
+ install.packages("Seurat")
+if(!requireNamespace("dplyr"))
+ install.packages("dplyr")
+
+# import scRNA data
library(Seurat)
allen_reference <- readRDS("allen_cortex.rds")
@@ -98,7 +103,11 @@ library(dplyr)
allen_reference <- SCTransform(allen_reference, ncells = 3000, verbose = FALSE) %>%
RunPCA(verbose = FALSE) %>%
RunUMAP(dims = 1:30)
+```
+
+Before deconvoluting Visium spots, we correct cell types labels and drop some cell types with extremely few number of cells (e.g. "CR").
+```{r eval = FALSE, class.source="watch-out"}
# update labels and subset
allen_reference$subclass <- gsub("L2/3 IT", "L23 IT", allen_reference$subclass)
allen_reference <- allen_reference[,colnames(allen_reference)[!allen_reference@meta.data$subclass %in% "CR"]]
@@ -117,14 +126,14 @@ gsubclass | gclass
## Spot Deconvolution with RCTD
-In order to integrate the scRNA data and the Visium data sets within the VoltRon objects, we will use **RCTD** algorithm which is accessible with the [spacexr](https://github.com/dmcable/spacexr) package.
+In order to integrate the scRNA data and the spatial data sets within the VoltRon object and estimate relative cell type abundances for each Visium spot, we will use **RCTD** algorithm which is accessible with the [spacexr](https://github.com/dmcable/spacexr) package.
```{r eval = FALSE, class.source="watch-out"}
-if(requireNamespace("spacexr"))
+if(!requireNamespace("spacexr"))
devtools::install_github("dmcable/spacexr", build_vignettes = FALSE)
```
-After running **getDeconvolution**, an additional assay within the same layer with **_decon** postfix will be created for all layers with a Visium data.
+After running **getDeconvolution**, an additional feature set within the same Visium assay with name **Decon** will be created.
```{r eval = FALSE, class.source="watch-out"}
library(spacexr)
@@ -156,7 +165,7 @@ vrFeatures(MBrain_Sec)
[19] "Sncg" "Sst" "Vip" "VLMC"
```
-These features (i.e. cell type abundances) can be visualized like all other feature types.
+These features (i.e. cell type abundances) can be visualized like any other feature type.
```{r eval = FALSE, class.source="watch-out"}
vrSpatialFeaturePlot(MBrain_Sec, features = c("L4", "L5 PT", "Oligo", "Vip"),
@@ -169,17 +178,18 @@ vrSpatialFeaturePlot(MBrain_Sec, features = c("L4", "L5 PT", "Oligo", "Vip"),
## Clustering
-Relative cell type abundances that are learned by RCTD and stored within VoltRon can now be used to cluster spots. These groups or clusters of spots can also be realized as **niches**, regions within tissue sections that have a distinct cell type composition compared to the remaining parts of the tissue.
+Relative cell type abundances that are learned by RCTD and stored within VoltRon can now be used to cluster spots. These groups or clusters of spots can often be referred to as **niches**. Here, as a definition, a niche is a region or a collection of regions within tissue that have a distinct cell type composition as opposed to the remaining parts of the tissue.
-We will now normalize and process relative cell type abundances to learn/detect these niche clusters. We treat cell type abundances as [compositional data](https://en.wikipedia.org/wiki/Compositional_data), hence we incorporate **centred log ratio (CLR)** transformation for normalizing them.
+The cell type abundances (which adds up to one for each spot) can be normalized and processed like transcriptomic and proteomic profiles prior to clustering (i.e. niche clustering). We treat cell type abundances as [compositional data](https://en.wikipedia.org/wiki/Compositional_data), hence we incorporate **centred log ratio (CLR)** transformation for normalizing them.
```{r eval = FALSE, class.source="watch-out"}
+vrMainFeatureType(MBrain_Sec) <- "Decon"
MBrain_Sec <- normalizeData(MBrain_Sec, method = "CLR")
```
-The CLR normalized assay have only 25 features, each representing a cell type from the single cell reference data. Hence, we can **directly calculate UMAP reductions from this feature set** without relying on a prior dimensionality reduction such as PCA.
+The CLR normalized assay have only 25 features, each representing a cell type from the single cell reference data. Hence, we can **directly calculate UMAP reductions from this feature abundances** since we dont have much number of features which necessitates dimensionality reduction such as PCA.
-VoltRon is also capable of calculating the UMAP reduction from normalized data slots. Hence, we build a UMAP reduction from CLR data. However, UMAP will always be calculated from a PCA reduction by default (if a PCA embedding is found in the object).
+However, we may still need to reduce the dimensionality of this space with 25 features using UMAP for visualizing purposes. VoltRon is also capable of calculating the UMAP reduction from normalized data slots. Hence, we build a UMAP reduction from CLR data directly. However, UMAP will always be calculated from a PCA reduction by default (if a PCA embedding is found in the object).
```{r eval = FALSE, class.source="watch-out"}
MBrain_Sec <- getUMAP(MBrain_Sec, data.type = "norm")
@@ -206,7 +216,7 @@ VoltRon incorporates distinct plotting functions for, e.g. embeddings, coordinat
```{r eval = FALSE, class.source="watch-out"}
# visualize
g1 <- vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "Sample")
-g2 <- vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "clusters", label = TRUE)
+g2 <- vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "niche_clusters", label = TRUE)
g1 | g2
```
@@ -214,10 +224,10 @@ g1 | g2
-Mapping clusters on the spatial images and spots would show the niche structure across the tissues and layers.
+Mapping clusters on the spatial images and spots would show the niche structure across all four tissue sections.
```{r eval = FALSE, class.source="watch-out"}
-vrSpatialPlot(MBrain_Sec, group.by = "clusters", crop = TRUE, alpha = 1)
+vrSpatialPlot(MBrain_Sec, group.by = "niche_clusters", crop = TRUE, alpha = 1)
```
@@ -227,8 +237,13 @@ vrSpatialPlot(MBrain_Sec, group.by = "clusters", crop = TRUE, alpha = 1)
We use **vrHeatmapPlot** to investigate relative cell type abundances across these niche clusters. You will need to have **ComplexHeatmap** package in your namespace.
```{r eval = FALSE, class.source="watch-out"}
+# install packages if necessary
+if(!requireNamespace("ComplexHeatmap"))
+ BiocManager::install("ComplexHeatmap")
+
+# heatmap of niches
library(ComplexHeatmap)
-vrHeatmapPlot(MBrain_Sec, features = vrFeatures(MBrain_Sec), group.by = "clusters",
+vrHeatmapPlot(MBrain_Sec, features = vrFeatures(MBrain_Sec), group.by = "niche_clusters",
show_row_names = T, show_heatmap_legend = T)
```
@@ -237,14 +252,21 @@ vrHeatmapPlot(MBrain_Sec, features = vrFeatures(MBrain_Sec), group.by = "cluster
# Cell-based Niche Clustering
-Unlike spot-based assays, spatial transcriptomics data in single cell resolution require building niche assays to detect clusters that are equivalent to the niches that each individual cell belongs to. We first detect the mixture of cells around a spatial neighborhood around each of these cells and use that a profile to perform clustering.
+Similar to spot-based spatial omics assays, we can build and cluster niche associated to each cell for spatial transcriptomics datasets in single cell resolution. For this, we require building niche assays for the collections of cells where a niche of cell is defined as a region of sets of regions with distinct cell type population that each of these cells belong to.
+
+Here, we dont require any scRNA reference dataset but we may first need to cluster and annotate cells in the RNA/transcriptome level profiles, and determine cell types. Then, we first detect the mixture of cell types within a spatial neighborhood around all cells and use that as a profile to perform clustering where these clusters will be associated with niches.
## Import Xenium Data
-For this, the data has to be already clustered (and annotated if possible). We will use the cluster labels generated at the end of the Xenium analysis section of workflow from [Cell/Spot Analysis](spotanalysis.html). You can download the VoltRon object with clustered and annotated Xenium cells along with the Visium assay from [here](https://bimsbstatic.mdc-berlin.de/landthaler/VoltRon/SpatialDataAlignment/Xenium_vs_Visium/VRBlock_data_clustered.rds).
+For this, the data has to be already clustered (and annotated if possible). We will use the cluster labels generated at the end of the Xenium analysis workflow from [Cell/Spot Analysis](spotanalysis.html). You can download the VoltRon object with clustered and annotated Xenium cells along with the Visium assay from [here](https://bimsbstatic.mdc-berlin.de/landthaler/VoltRon/SpatialDataAlignment/Xenium_vs_Visium/VRBlock_data_clustered.rds).
```{r eval = FALSE, class.source="watch-out"}
Xen_data <- readRDS("VRBlock_data_clustered.rds")
+```
+
+We will use all these 18 cell types used for annotating Xenium cells for detecting niches with distinct cellular type mixtures.
+
+```{r eval = FALSE, class.source="watch-out"}
vrMainSpatial(Xen_data, assay = "Assay1") <- "main"
vrMainSpatial(Xen_data, assay = "Assay3") <- "main"
vrSpatialPlot(Xen_data, group.by = "CellType", pt.size = 0.13, background.color = "black",
@@ -257,7 +279,7 @@ vrSpatialPlot(Xen_data, group.by = "CellType", pt.size = 0.13, background.color
## Creating Niche Assay
-Here, in order to calculate niche profiles for each cell, we have to first build spatial neighborhoods around cells and capture the local cell type mixtures. Using **getSpatialNeighbors**, we build a spatial neighborhood graph to connect all cells to other cells within at most 15 distance apart
+For calculating niche profiles for each cell, we have to first build spatial neighborhoods around cells and capture the local cell type mixtures. Using **getSpatialNeighbors**, we build a spatial neighborhood graph to connect all cells to other cells within at most 15 distance apart.
```{r eval = FALSE, class.source="watch-out"}
Xen_data <- getSpatialNeighbors(Xen_data, radius = 15, method = "radius")
@@ -294,10 +316,15 @@ vrMainFeatureType(Xen_data) <- "Niche"
Xen_data <- normalizeData(Xen_data, method = "CLR")
```
-Default clustering functions could be used to analyze the normalized niche profiles of cells to detect niches associated with each cell. However, we use K-means algorithm to perform the niche clustering.
+Default clustering functions could be used to analyze the normalized niche profiles of cells to detect niches associated with each cell. However, we use K-means algorithm to perform the niche clustering. For this exercise, we pick an estimate of 7 clusters which will be the number of niche clusters we get.
```{r eval = FALSE, class.source="watch-out"}
Xen_data <- getClusters(Xen_data, nclus = 7, method = "kmeans", label = "niche_clusters")
+```
+
+After the niche clustering, the metadata is updated and observed later like below.
+
+```{r eval = FALSE, class.source="watch-out"}
head(Metadata(Xen_data))
```
@@ -313,14 +340,24 @@ head(Metadata(Xen_data))
## Visualization
+After niche clustering, each cell in the Xenium assay will be assigned a niche which is initially a number which indicates the ID of each particular niche. It is up to the user to annotate, filter and visualize these niches moving forward.
+
```{r eval = FALSE, class.source="watch-out"}
vrSpatialPlot(Xen_data, group.by = "niche_clusters", alpha = 1, legend.loc = "top")
```
+We use **vrHeatmapPlot** to investigate the abundance of each cell type across the niche clusters. You will need to have **ComplexHeatmap** package in your namespace. We see that niche cluster 1 include all invasive tumor subtypes (IT 1-3). We see this for two subtypes of in situ ductal carcinoma (DCIS 1,2) subtypes as well other than a third DCIS subcluster being within proximity to myoepithelial cells. Niche cluster 6 also shows regions within the breast cancer tissue where T cells and B cells are found together abundantly.
+
```{r eval = FALSE, class.source="watch-out"}
+# install packages if necessary
+if(!requireNamespace("ComplexHeatmap"))
+ BiocManager::install("ComplexHeatmap")
+
+# heatmap of niches
+library(ComplexHeatmap)
vrHeatmapPlot(Xen_data, features = vrFeatures(Xen_data), group.by = "niche_clusters")
```
diff --git a/docs/deconvolution.html b/docs/nicheclustering.html
similarity index 78%
rename from docs/deconvolution.html
rename to docs/nicheclustering.html
index fae6ddd..e6b629c 100644
--- a/docs/deconvolution.html
+++ b/docs/nicheclustering.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
@@ -462,19 +462,22 @@ Niche Clustering
Spot-based Niche Clustering
Spot-based spatial transcriptomic assays capture spatially-resolved
gene expression profiles that are somewhat closer to single cell
-resolution. However, each spot still include a few number of cells that
-are likely from a combination of cell types within the tissue of origin.
-RNA deconvolution is then incorporated to estimate the
-percentage/abundance of these cell types within each spot/ROI given a
-reference scRNAseq dataset.
-VoltRon includes wrapper commands for using popular RNA deconvolution
-methods such as RCTD
-(spot) and return estimated abundances as additional
-assays within each layer. These estimated percentages of cell types each
-spot could be incorporated to detect niches (i.e. small
-local microenvironments of cells) within the tissue. We can process cell
-type abundance assays and used them for clustering to detect these
+resolution. However, each spot still includes a few number of cells that
+are likely originated from few number of cell types, hence
+transcriptomic profile of each spot would likely include markers from
+multiple cell types. Here, RNA deconvolution can be
+incorporated to estimate the percentage/abundance of cell types for each
+spot. We use a scRNAseq dataset as a reference to computationally
+estimate the relative abundance of cell types across across the
+spots.
+VoltRon includes wrapper commands for using popular spot-level RNA
+deconvolution methods such as RCTD and
+return estimated abundances as additional feature sets within each
+layer. These estimated percentages of cell types for each spot could be
+incorporated to detect niches (i.e. small local
+microenvironments of cells) within the tissue. We can process cell type
+abundance assays and used them for clustering to detect these
niches.
@@ -489,9 +492,9 @@
Import Visium Data
AnatomicalEntity=brain,
Chemistry=v1
and
PipelineVersion=v1.1.0).
We will now import each of four samples separately and merge them
-into one VoltRon object. There are four sections in total given two
-serial anterior and serial posterior sections, hence we have two
-tissue blocks each having two layers.
+into one VoltRon object. There are four brain tissue sections in total
+given two serial anterior and serial posterior sections, hence we have
+
two tissue blocks each having two layers.
library(VoltRon)
Ant_Sec1 <- importVisium("Sagittal_Anterior/Section1/", sample_name = "Anterior1")
Ant_Sec2 <- importVisium("Sagittal_Anterior/Section2/", sample_name = "Anterior2")
@@ -522,24 +525,30 @@ Import scRNA data
We will now import the scRNA data for reference which can be
downloaded from here.
-Specifically, we will use a scRNA reference of Mouse cortical adult
-brain with 14,000 cells from the Allen Institute, generated with the
-SMART-Seq2 protocol. This scRNA data is also used by the Spatial Data
+Specifically, we will use a scRNA data of Mouse cortical adult brain
+with 14,000 cells, generated with the SMART-Seq2 protocol, from the
+Allen Institute. This scRNA data is also used by the Spatial Data
Analysis tutorial in Seurat
-website.
-Before deconvoluting Visium spots, we correct cell types labels and
-drop some cell types with extremely few number of cells (e.g. “CR”).
-library(Seurat)
+href="https://satijalab.org/seurat/articles/spatial_vignette.html#integration-with-single-cell-data">Seurat
+website.
+# install packages if necessary
+if(!requireNamespace("Seurat"))
+ install.packages("Seurat")
+if(!requireNamespace("dplyr"))
+ install.packages("dplyr")
+
+# import scRNA data
+library(Seurat)
allen_reference <- readRDS("allen_cortex.rds")
# process and reduce dimensionality
library(dplyr)
allen_reference <- SCTransform(allen_reference, ncells = 3000, verbose = FALSE) %>%
RunPCA(verbose = FALSE) %>%
- RunUMAP(dims = 1:30)
-
-# update labels and subset
+ RunUMAP(dims = 1:30)
+Before deconvoluting Visium spots, we correct cell types labels and
+drop some cell types with extremely few number of cells (e.g. “CR”).
+# update labels and subset
allen_reference$subclass <- gsub("L2/3 IT", "L23 IT", allen_reference$subclass)
allen_reference <- allen_reference[,colnames(allen_reference)[!allen_reference@meta.data$subclass %in% "CR"]]
@@ -554,15 +563,16 @@ Import scRNA data
Spot Deconvolution with RCTD
-
In order to integrate the scRNA data and the Visium data sets within
-the VoltRon objects, we will use RCTD algorithm which
-is accessible with the In order to integrate the scRNA data and the spatial data sets within
+the VoltRon object and estimate relative cell type abundances for each
+Visium spot, we will use RCTD algorithm which is
+accessible with the spacexr package.
-
if(requireNamespace("spacexr"))
+if(!requireNamespace("spacexr"))
devtools::install_github("dmcable/spacexr", build_vignettes = FALSE)
-After running getDeconvolution, an additional assay
-within the same layer with **_decon** postfix will be created for all
-layers with a Visium data.
+After running getDeconvolution, an additional
+feature set within the same Visium assay with name
+Decon will be created.
library(spacexr)
MBrain_Sec <- getDeconvolution(MBrain_Sec, sc.object = allen_reference, sc.cluster = "subclass", max_cores = 6)
MBrain_Sec
@@ -582,8 +592,8 @@ Spot Deconvolution with RCTD
[7] "L6 CT" "L6 IT" "L6b" "Lamp5" "Macrophage" "Meis2"
[13] "NP" "Oligo" "Peri" "Pvalb" "Serpinf1" "SMC"
[19] "Sncg" "Sst" "Vip" "VLMC"
-
These features (i.e. cell type abundances) can be visualized like all
-other feature types.
+
These features (i.e. cell type abundances) can be visualized like any
+other feature type.
vrSpatialFeaturePlot(MBrain_Sec, features = c("L4", "L5 PT", "Oligo", "Vip"),
crop = TRUE, ncol = 2, alpha = 1, keep.scale = "all")
@@ -593,24 +603,30 @@
Spot Deconvolution with RCTD
Clustering
Relative cell type abundances that are learned by RCTD and stored
within VoltRon can now be used to cluster spots. These groups or
-clusters of spots can also be realized as niches,
-regions within tissue sections that have a distinct cell type
-composition compared to the remaining parts of the tissue.
-
We will now normalize and process relative cell type abundances to
-learn/detect these niche clusters. We treat cell type abundances as niches.
+Here, as a definition, a niche is a region or a collection of regions
+within tissue that have a distinct cell type composition as opposed to
+the remaining parts of the tissue.
+
The cell type abundances (which adds up to one for each spot) can be
+normalized and processed like transcriptomic and proteomic profiles
+prior to clustering (i.e. niche clustering). We treat cell type
+abundances as compositional
data, hence we incorporate centred log ratio (CLR)
transformation for normalizing them.
-
MBrain_Sec <- normalizeData(MBrain_Sec, method = "CLR")
+
vrMainFeatureType(MBrain_Sec) <- "Decon"
+MBrain_Sec <- normalizeData(MBrain_Sec, method = "CLR")
The CLR normalized assay have only 25 features, each representing a
cell type from the single cell reference data. Hence, we can
directly calculate UMAP reductions from this feature
-set without relying on a prior dimensionality reduction such as
-PCA.
-
VoltRon is also capable of calculating the UMAP reduction from
-normalized data slots. Hence, we build a UMAP reduction from CLR data.
-However, UMAP will always be calculated from a PCA reduction by default
-(if a PCA embedding is found in the object).
+abundances since we dont have much number of features which
+necessitates dimensionality reduction such as PCA.
+
However, we may still need to reduce the dimensionality of this space
+with 25 features using UMAP for visualizing purposes. VoltRon is also
+capable of calculating the UMAP reduction from normalized data slots.
+Hence, we build a UMAP reduction from CLR data directly. However, UMAP
+will always be calculated from a PCA reduction by default (if a PCA
+embedding is found in the object).
MBrain_Sec <- getUMAP(MBrain_Sec, data.type = "norm")
vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "Sample")
@@ -628,20 +644,25 @@
Visualization
the clusters we have generated on UMAP embeddings.
# visualize
g1 <- vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "Sample")
-g2 <- vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "clusters", label = TRUE)
+g2 <- vrEmbeddingPlot(MBrain_Sec, embedding = "umap", group.by = "niche_clusters", label = TRUE)
g1 | g2
Mapping clusters on the spatial images and spots would show the niche
-structure across the tissues and layers.
-
vrSpatialPlot(MBrain_Sec, group.by = "clusters", crop = TRUE, alpha = 1)
+structure across all four tissue sections.
+
vrSpatialPlot(MBrain_Sec, group.by = "niche_clusters", crop = TRUE, alpha = 1)
We use vrHeatmapPlot to investigate relative cell
type abundances across these niche clusters. You will need to have
ComplexHeatmap package in your namespace.
-
library(ComplexHeatmap)
-vrHeatmapPlot(MBrain_Sec, features = vrFeatures(MBrain_Sec), group.by = "clusters",
+# install packages if necessary
+if(!requireNamespace("ComplexHeatmap"))
+ BiocManager::install("ComplexHeatmap")
+
+# heatmap of niches
+library(ComplexHeatmap)
+vrHeatmapPlot(MBrain_Sec, features = vrFeatures(MBrain_Sec), group.by = "niche_clusters",
show_row_names = T, show_heatmap_legend = T)
@@ -649,22 +670,30 @@ Visualization
Cell-based Niche Clustering
-
Unlike spot-based assays, spatial transcriptomics data in single cell
-resolution require building niche assays to detect clusters that are
-equivalent to the niches that each individual cell belongs to. We first
-detect the mixture of cells around a spatial neighborhood around each of
-these cells and use that a profile to perform clustering.
+
Similar to spot-based spatial omics assays, we can build and cluster
+niche associated to each cell for spatial transcriptomics datasets in
+single cell resolution. For this, we require building niche assays for
+the collections of cells where a niche of cell is defined as a region of
+sets of regions with distinct cell type population that each of these
+cells belong to.
+
Here, we dont require any scRNA reference dataset but we may first
+need to cluster and annotate cells in the RNA/transcriptome level
+profiles, and determine cell types. Then, we first detect the mixture of
+cell types within a spatial neighborhood around all cells and use that
+as a profile to perform clustering where these clusters will be
+associated with niches.
Import Xenium Data
For this, the data has to be already clustered (and annotated if
possible). We will use the cluster labels generated at the end of the
-Xenium analysis section of workflow from Cell/Spot Analysis. You can download the
-VoltRon object with clustered and annotated Xenium cells along with the
-Visium assay from Cell/Spot
+Analysis. You can download the VoltRon object with clustered and
+annotated Xenium cells along with the Visium assay from here.
-
Xen_data <- readRDS("VRBlock_data_clustered.rds")
-vrMainSpatial(Xen_data, assay = "Assay1") <- "main"
+Xen_data <- readRDS("VRBlock_data_clustered.rds")
+We will use all these 18 cell types used for annotating Xenium cells
+for detecting niches with distinct cellular type mixtures.
+vrMainSpatial(Xen_data, assay = "Assay1") <- "main"
vrMainSpatial(Xen_data, assay = "Assay3") <- "main"
vrSpatialPlot(Xen_data, group.by = "CellType", pt.size = 0.13, background.color = "black",
legend.loc = "top", n.tile = 500)
@@ -673,11 +702,11 @@ Import Xenium Data
Creating Niche Assay
-
Here, in order to calculate niche profiles for each cell, we have to
-first build spatial neighborhoods around cells and capture the local
-cell type mixtures. Using getSpatialNeighbors, we build
-a spatial neighborhood graph to connect all cells to other cells within
-at most 15 distance apart
+
For calculating niche profiles for each cell, we have to first build
+spatial neighborhoods around cells and capture the local cell type
+mixtures. Using getSpatialNeighbors, we build a spatial
+neighborhood graph to connect all cells to other cells within at most 15
+distance apart.
Xen_data <- getSpatialNeighbors(Xen_data, radius = 15, method = "radius")
vrGraphNames(Xen_data)
[1] "radius"
@@ -704,9 +733,13 @@
Clustering
Xen_data <- normalizeData(Xen_data, method = "CLR")
Default clustering functions could be used to analyze the normalized
niche profiles of cells to detect niches associated with each cell.
-However, we use K-means algorithm to perform the niche clustering.
-
Xen_data <- getClusters(Xen_data, nclus = 7, method = "kmeans", label = "niche_clusters")
-head(Metadata(Xen_data))
+However, we use K-means algorithm to perform the niche clustering. For
+this exercise, we pick an estimate of 7 clusters which will be the
+number of niche clusters we get.
+
Xen_data <- getClusters(Xen_data, nclus = 7, method = "kmeans", label = "niche_clusters")
+
After the niche clustering, the metadata is updated and observed
+later like below.
+
head(Metadata(Xen_data))
id Count assay_id Assay Layer Sample CellType niche_clusters
1_Assay1 1_Assay1 28 Assay1 Xenium Section1 10XBlock DCIS_1 2
@@ -720,10 +753,29 @@ Clustering
Visualization
+
After niche clustering, each cell in the Xenium assay will be
+assigned a niche which is initially a number which indicates the ID of
+each particular niche. It is up to the user to annotate, filter and
+visualize these niches moving forward.
vrSpatialPlot(Xen_data, group.by = "niche_clusters", alpha = 1, legend.loc = "top")
+
We use vrHeatmapPlot to investigate the abundance of
+each cell type across the niche clusters. You will need to have
+ComplexHeatmap package in your namespace. We see that
+niche cluster 1 include all invasive tumor subtypes (IT 1-3). We see
+this for two subtypes of in situ ductal carcinoma (DCIS 1,2) subtypes as
+well other than a third DCIS subcluster being within proximity to
+myoepithelial cells. Niche cluster 6 also shows regions within the
+breast cancer tissue where T cells and B cells are found together
+abundantly.
-
vrHeatmapPlot(Xen_data, features = vrFeatures(Xen_data), group.by = "niche_clusters")
+
# install packages if necessary
+if(!requireNamespace("ComplexHeatmap"))
+ BiocManager::install("ComplexHeatmap")
+
+# heatmap of niches
+library(ComplexHeatmap)
+vrHeatmapPlot(Xen_data, features = vrFeatures(Xen_data), group.by = "niche_clusters")
diff --git a/docs/ondisk.html b/docs/ondisk.html
index 85aa4c6..109d087 100644
--- a/docs/ondisk.html
+++ b/docs/ondisk.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/pixelanalysis.html b/docs/pixelanalysis.html
index b057698..eeadf7d 100644
--- a/docs/pixelanalysis.html
+++ b/docs/pixelanalysis.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/registration.html b/docs/registration.html
index 7f43bfa..d9be384 100644
--- a/docs/registration.html
+++ b/docs/registration.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/roianalysis.html b/docs/roianalysis.html
index b1af33b..c8d9bb4 100644
--- a/docs/roianalysis.html
+++ b/docs/roianalysis.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/spotanalysis.html b/docs/spotanalysis.html
index 31aa299..aa1fe88 100644
--- a/docs/spotanalysis.html
+++ b/docs/spotanalysis.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
diff --git a/docs/tutorials.Rmd b/docs/tutorials.Rmd
index 5734b1e..1e39899 100644
--- a/docs/tutorials.Rmd
+++ b/docs/tutorials.Rmd
@@ -72,7 +72,7 @@ In addition, VoltRon provides a number of spatially aware data analysis methods
Integrating data modalities within or across tissue sections
-
+ |
Niche Clustering
Clustering based on ROI/spot deconvolution and Spatial Neighborhood
diff --git a/docs/tutorials.html b/docs/tutorials.html
index f63198e..d4401ef 100644
--- a/docs/tutorials.html
+++ b/docs/tutorials.html
@@ -264,7 +264,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
@@ -444,7 +444,7 @@ Spatially Aware Data Integration and Analysis
Integrating data modalities within or across tissue sections
|
-
+ |
Niche Clustering
diff --git a/docs/voltronobjects.html b/docs/voltronobjects.html
index 5f4c2b5..ed83e67 100644
--- a/docs/voltronobjects.html
+++ b/docs/voltronobjects.html
@@ -346,7 +346,7 @@
Multi-omic Integration
- Niche Clustering
+ Niche Clustering
|