Integration of the non-multiome data #534

kinali · 2021-03-25T20:03:59Z

kinali
Mar 25, 2021

Hi all,
I have separate (non-multiome) 10X scRNA-seq and scATAC-seq data and I am trying to integrate them. I have transferred the labels and I can see integrated UMAP. However, when I was trying to use Coverageplot with my integrated data I have realized that I don't have Chromatinassay in the integrated object. How can I transfer Chromatinassay from non-integrated data to integrated data because when I check there is Chromatinassay in scatac data. Thank you in advance!

Here is the code:

s1_rna <- readRDS(file = "./s1_rna_wCells.rds")
#each row represents region of the genome (a peak) in peak/cell matrix
#which is predicted to represent a region of open chromatin:
counts <- Read10X_h5(filename = "./atac_out/outs/filtered_peak_bc_matrix.h5")
metadata <- read.csv(file = "./atac_out/outs/singlecell.csv",
                     header = TRUE,
                     row.names = 1)
#fragment file represents a full list of unique fragments across all single cells
#it contains all fragments associated with each single cell:
chrom_assay <- CreateChromatinAssay(
  counts = counts,
  sep = c(":", "-"),
  genome = 'hg38',
  fragments = './atac_out/outs/fragments.tsv.gz',
  min.cells = 10,
  min.features = 200
)

s1 <- CreateSeuratObject(counts = chrom_assay, assay = "peaks", meta.data = metadata, project = "s1")
#
# extract gene annotations for hg38 from EnsDb 
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)

# change to UCSC style since the data was mapped to hg38
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"

# add the gene information to the object
Annotation(s1) <- annotations

#Compute QC Metrics:
s1 <- NucleosomeSignal(object = s1)

# compute TSS enrichment score per cell
#ATAC-seq targeting score based on the ratio of fragments centered at the TSS to fragments in TSS-flanking regions
#Poor ATAC-seq experiments typically will have a low TSS enrichment score.
s1 <- TSSEnrichment(object = s1, fast = FALSE)
#
# add blacklist ratio and fraction of reads in peaks
s1$pct_reads_in_peaks <- s1$peak_region_fragments / s1$passed_filters * 100
s1$blacklist_ratio <- s1$blacklist_region_fragments / s1$peak_region_fragments
#
s1$high.tss <- ifelse(s1$TSS.enrichment > 2, 'High', 'Low')
#Nucleosomal banding pattern
s1$nucleosome_group <- ifelse(s1$nucleosome_signal > 2, 'NS > 2', 'NS < 2')
#
s1 <- subset(x = s1,
             subset = peak_region_fragments > 900 &
               peak_region_fragments < 35000 &
               nCount_peaks < 70000 &
               pct_reads_in_peaks > 15 &
               blacklist_ratio < 0.005 &
               nucleosome_signal < 2 &
               TSS.enrichment > 2 )
###############################################
#Normalization and linear dimensional reduction:
#1)TF-IDF normalization 
#first step normalization across cells to correct for differences in cellular sequencing depth
#second step across peaks
s1 <- RunTFIDF(s1)
#2)Feature selection (top peaks for dimensional reduction)
s1 <- FindTopFeatures(s1,min.cutoff = 'q0')
#3)Dimension reduction by running SVD(singular value decomposition) on the TD-IDF matrix using the features(peaks)
s1 <- RunSVD(s1)
#DepthCor(s1,n = 50) #decide dimension in umap using this plot, remove 1st component it shows technical variation

#Non-linear dimension reduction and clustering
s1 <- RunUMAP(s1, reduction = 'lsi', dims = 2:40)
s1 <- FindNeighbors(s1, reduction = 'lsi', dims = 2:40)
s1 <- FindClusters(s1, algorithm = 3)
#DimPlot(s1, label = TRUE) 

#Create a gene activity matrix
gene.activities <- GeneActivity(s1)

# add the gene activity matrix to the Seurat object as a new assay 
s1[['ACT']] <- CreateAssayObject(counts = gene.activities)
DefaultAssay(s1) <- "ACT"
s1 <- FindVariableFeatures(s1)
#then normalize it
s1 <- NormalizeData(
  object = s1,
  assay = 'ACT',
  normalization.method = 'LogNormalize')
#
s1 <- ScaleData(s1)
#Integrating with scRNA-seq data
#use pre-processed scRNA-seq dataset s1_rna
transfer.anchors <- FindTransferAnchors(
  reference = s1_rna,
  query = s1,
  features = VariableFeatures(object = s1_rna),
  reference.assay = "RNA",
  query.assay = "ACT",
  reduction = 'cca'
)

predicted.labels <- TransferData(
  anchorset = transfer.anchors,
  refdata = s1_rna$cell_types,
  weight.reduction = s1[['lsi']],
  dims = 2:40
)
s1 <- AddMetaData(s1,metadata = predicted.labels)
#add transfered labels to atac seq data:
s1_atac_filtered <- subset(s1, subset = prediction.score.max > 0.5)
s1_atac_filtered$predicted.id <- factor(s1_atac_filtered$predicted.id, levels = levels(s1_rna))  # to make the colors match

#save atac with transferred anchors
saveRDS(s1_atac_filtered,file = "./s1_atac_wCells.rds")

################################Co-embedding##############################
genes.use <- VariableFeatures(s1_rna)
refdata <- GetAssayData(s1_rna, assay = "RNA", slot = "data")[genes.use, ]
imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata,dims = 2:40, weight.reduction = s1[["lsi"]])
# this line adds the imputed data matrix to the atac object
s1[["RNA"]] <- imputation
coembed <- merge(x = s1_rna, y = s1)

# run PCA and UMAP on this combined object, to visualize the co-embedding of both datasets
coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
coembed <- RunUMAP(coembed, dims = 1:40)
coembed$cell_types <- ifelse(!is.na(coembed$cell_types), coembed$cell_types, coembed$predicted.id)
#
saveRDS(coembed,file = "./coembed.rds")```

Answered by timoast

Apr 6, 2021

Hi @kinali, I think you just need to change the default assay here back to "peaks" after doing the transfer

View full answer

timoast · 2021-04-06T21:36:16Z

timoast
Apr 6, 2021
Maintainer

Hi @kinali, I think you just need to change the default assay here back to "peaks" after doing the transfer

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Integration of the non-multiome data #534

{{title}}

Replies: 1 comment

{{title}}

Select a reply

Integration of the non-multiome data #534

kinali Mar 25, 2021

Replies: 1 comment

timoast Apr 6, 2021 Maintainer

kinali
Mar 25, 2021

timoast
Apr 6, 2021
Maintainer