-
Hi all, Here is the code: s1_rna <- readRDS(file = "./s1_rna_wCells.rds")
#each row represents region of the genome (a peak) in peak/cell matrix
#which is predicted to represent a region of open chromatin:
counts <- Read10X_h5(filename = "./atac_out/outs/filtered_peak_bc_matrix.h5")
metadata <- read.csv(file = "./atac_out/outs/singlecell.csv",
header = TRUE,
row.names = 1)
#fragment file represents a full list of unique fragments across all single cells
#it contains all fragments associated with each single cell:
chrom_assay <- CreateChromatinAssay(
counts = counts,
sep = c(":", "-"),
genome = 'hg38',
fragments = './atac_out/outs/fragments.tsv.gz',
min.cells = 10,
min.features = 200
)
s1 <- CreateSeuratObject(counts = chrom_assay, assay = "peaks", meta.data = metadata, project = "s1")
#
# extract gene annotations for hg38 from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
# change to UCSC style since the data was mapped to hg38
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"
# add the gene information to the object
Annotation(s1) <- annotations
#Compute QC Metrics:
s1 <- NucleosomeSignal(object = s1)
# compute TSS enrichment score per cell
#ATAC-seq targeting score based on the ratio of fragments centered at the TSS to fragments in TSS-flanking regions
#Poor ATAC-seq experiments typically will have a low TSS enrichment score.
s1 <- TSSEnrichment(object = s1, fast = FALSE)
#
# add blacklist ratio and fraction of reads in peaks
s1$pct_reads_in_peaks <- s1$peak_region_fragments / s1$passed_filters * 100
s1$blacklist_ratio <- s1$blacklist_region_fragments / s1$peak_region_fragments
#
s1$high.tss <- ifelse(s1$TSS.enrichment > 2, 'High', 'Low')
#Nucleosomal banding pattern
s1$nucleosome_group <- ifelse(s1$nucleosome_signal > 2, 'NS > 2', 'NS < 2')
#
s1 <- subset(x = s1,
subset = peak_region_fragments > 900 &
peak_region_fragments < 35000 &
nCount_peaks < 70000 &
pct_reads_in_peaks > 15 &
blacklist_ratio < 0.005 &
nucleosome_signal < 2 &
TSS.enrichment > 2 )
###############################################
#Normalization and linear dimensional reduction:
#1)TF-IDF normalization
#first step normalization across cells to correct for differences in cellular sequencing depth
#second step across peaks
s1 <- RunTFIDF(s1)
#2)Feature selection (top peaks for dimensional reduction)
s1 <- FindTopFeatures(s1,min.cutoff = 'q0')
#3)Dimension reduction by running SVD(singular value decomposition) on the TD-IDF matrix using the features(peaks)
s1 <- RunSVD(s1)
#DepthCor(s1,n = 50) #decide dimension in umap using this plot, remove 1st component it shows technical variation
#Non-linear dimension reduction and clustering
s1 <- RunUMAP(s1, reduction = 'lsi', dims = 2:40)
s1 <- FindNeighbors(s1, reduction = 'lsi', dims = 2:40)
s1 <- FindClusters(s1, algorithm = 3)
#DimPlot(s1, label = TRUE)
#Create a gene activity matrix
gene.activities <- GeneActivity(s1)
# add the gene activity matrix to the Seurat object as a new assay
s1[['ACT']] <- CreateAssayObject(counts = gene.activities)
DefaultAssay(s1) <- "ACT"
s1 <- FindVariableFeatures(s1)
#then normalize it
s1 <- NormalizeData(
object = s1,
assay = 'ACT',
normalization.method = 'LogNormalize')
#
s1 <- ScaleData(s1)
#Integrating with scRNA-seq data
#use pre-processed scRNA-seq dataset s1_rna
transfer.anchors <- FindTransferAnchors(
reference = s1_rna,
query = s1,
features = VariableFeatures(object = s1_rna),
reference.assay = "RNA",
query.assay = "ACT",
reduction = 'cca'
)
predicted.labels <- TransferData(
anchorset = transfer.anchors,
refdata = s1_rna$cell_types,
weight.reduction = s1[['lsi']],
dims = 2:40
)
s1 <- AddMetaData(s1,metadata = predicted.labels)
#add transfered labels to atac seq data:
s1_atac_filtered <- subset(s1, subset = prediction.score.max > 0.5)
s1_atac_filtered$predicted.id <- factor(s1_atac_filtered$predicted.id, levels = levels(s1_rna)) # to make the colors match
#save atac with transferred anchors
saveRDS(s1_atac_filtered,file = "./s1_atac_wCells.rds")
################################Co-embedding##############################
genes.use <- VariableFeatures(s1_rna)
refdata <- GetAssayData(s1_rna, assay = "RNA", slot = "data")[genes.use, ]
imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata,dims = 2:40, weight.reduction = s1[["lsi"]])
# this line adds the imputed data matrix to the atac object
s1[["RNA"]] <- imputation
coembed <- merge(x = s1_rna, y = s1)
# run PCA and UMAP on this combined object, to visualize the co-embedding of both datasets
coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
coembed <- RunUMAP(coembed, dims = 1:40)
coembed$cell_types <- ifelse(!is.na(coembed$cell_types), coembed$cell_types, coembed$predicted.id)
#
saveRDS(coembed,file = "./coembed.rds")``` |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
Hi @kinali, I think you just need to change the default assay here back to "peaks" after doing the transfer |
Beta Was this translation helpful? Give feedback.
Hi @kinali, I think you just need to change the default assay here back to "peaks" after doing the transfer