-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGO_enrichment_analysis.R
217 lines (162 loc) · 7.25 KB
/
GO_enrichment_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#install.packages("BiocManager")
#BiocManager::install("clusterProfiler")
library(clusterProfiler)
# Load BiocManager and install AnnotationHub
#BiocManager::install("AnnotationHub")
# Load AnnotationHub
library(AnnotationHub)
# Install biomaRt using BiocManager
#BiocManager::install("biomaRt")
#BiocManager::install("AnnotationForge")
library(AnnotationForge)
### Make custom bacillus subtilis orgDb project
makeOrgPackageFromNCBI(version = "0.1",
author = "AC Lawaetz",
maintainer = "AC Lawaetz <[email protected]",
outputDir = "/Users/andreas/Bacillus/Bioinformatics/ClusterProfiler_orgDB",
tax_id = "224308",
genus = "Bacillus",
species = "subtilis")
### Make custom GO terms from SubtiWiki to input ClusterProfiler
# Load necessary libraries
library(clusterProfiler)
library(dplyr)
# Load your custom gene-to-category data
gene2category <- read.csv("/Users/andreas/Bacillus/Bioinformatics/RNAseq_jag_khpa_kre/X204SC22071444-Z01-F001/Analysis/Gene_categories/GeneID_Category_All.csv")
colnames(gene2category) <- c("Category", "GeneID")
# Create a TERM2GENE data frame
TERM2GENE <- gene2category %>%
rename(term = Category, gene = GeneID)
# Create a TERM2NAME data frame (optional, but useful for readability)
TERM2NAME <- gene2category %>%
select(Category) %>%
distinct() %>%
mutate(name = Category) %>%
rename(term = Category)
# Specify the path to your gene list file
gene_list_file <- "/Users/andreas/Bacillus/Bioinformatics/RNAseq_jag_khpA/sig_DEG/pyRAP_Edinburgh_jag_all.csv_sig_genes_BSU_Numbers.txt"
# Read the gene list into R
genes_of_interest <- read.table(gene_list_file, header = FALSE, stringsAsFactors = FALSE)
genes_of_interest <- as.character(genes_of_interest$V1)
# Perform GO-like enrichment analysis
go_enrichment <- enricher(genes_of_interest, TERM2GENE = TERM2GENE, TERM2NAME = TERM2NAME)
# View results
print(go_enrichment)
# Dot plot of the enrichment results
dotplot(go_enrichment, showCategory = 20) +
labs(title = "GO Enrichment Analysis Jag") +
theme(plot.title = element_text(hjust = 0.5))
# Print the current cache directory
current_cache <- getAnnotationHubOption("CACHE")
print(current_cache)
# Load your custom OrgDb package
install.packages("/Users/andreas/Bacillus/Bioinformatics/ClusterProfiler_orgDB/org.Bsubtilis.eg.db", repos = NULL, type = "source")
library(clusterProfiler)
library(org.Bsubtilis.eg.db)
library(ggplot2)
# Specify the path to your gene list file
gene_list_file <- "/Users/andreas/Bacillus/Bioinformatics/RNAseq_jag_khpA/sig_DEG/pyRAP_Edinburgh_jag_all.csv_sig_genes.txt"
gene_list_BSUfile <- "/Users/andreas/Bacillus/Bioinformatics/RNAseq_jag_khpA/sig_DEG/pyRAP_Edinburgh_jag_all.csv_sig_genes_BSUNumbers_no_underscore.txt"
# Read the gene list into R
gene_list <- read.table(gene_list_file, header = FALSE, stringsAsFactors = FALSE)
gene_listBSU <- read.table(gene_list_BSUfile, header = FALSE, stringsAsFactors = FALSE)
# Convert to a character vector
gene_list <- gene_list$V1
gene_listBSU <- gene_listBSU$V1
# Check supported key types
#keytypes(org.Bsubtilis.eg.db)
# Perform GO enrichment analysis
ego <- enrichGO(gene = gene_list,
OrgDb = org.Bsubtilis.eg.db,
keyType = "SYMBOL", # Adjust the keyType based on your gene identifiers
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# View the results
head(ego)
# Create a dot plot
dotplot(ego) + ggtitle("GO Enrichment Analysis")
# Load required libraries
library(clusterProfiler)
library(org.Bsubtilis.eg.db) # Replace 'Xx' with the appropriate organism code
# Perform KEGG pathway enrichment analysis
kegg_enrichment <- enrichKEGG(gene = gene_listBSU,
organism = "bsu", # Replace 'Xx' with the appropriate organism code
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# View the results
head(kegg_enrichment)
# Create a dot plot
dotplot(kegg_enrichment) + ggtitle("KEGG Enrichment Analysis")
# Extract genes used in the enrichment analysis
included_genes <- kegg_enrichment@result$geneID
# Convert the included genes from a single string to a vector
included_genes_vector <- unlist(strsplit(included_genes, "/"))
# Identify the excluded genes
excluded_genes <- setdiff(gene_list, included_genes_vector)
# Display excluded genes
cat("Excluded genes:\n")
print(excluded_genes)
# Test if "dps" is in the excluded genes list
is_dps_excluded <- "dps" %in% excluded_genes
# Print the result
if (is_dps_excluded) {
cat("The gene 'dps' is in the excluded genes list.\n")
} else {
cat("The gene 'dps' is not in the excluded genes list.\n")
}
### Examine orgDb object
library(AnnotationDbi)
# 1. List available keys
available_keys <- keytypes(org.Bsubtilis.eg.db)
print(available_keys)
# 2. Extract gene annotations
# For example, you can extract gene symbols and names
genes <- AnnotationDbi::select(org.Bsubtilis.eg.db, keys = keys(org.Bsubtilis.eg.db), columns = c("SYMBOL", "GENENAME", 'GO', 'ONTOLOGY'), keytype = "ENTREZID")
# Print the first few rows of the gene annotations
print(head(genes))
# Save the gene annotations to a CSV file
write.csv(genes, file = "/Users/andreas/Bacillus/Bioinformatics/RNAseq_spoVG_LB_OD3/Analysis/gene_annotations_orgDb.csv", row.names = FALSE)
### Investigate why the ont='ALL' doesn't include 'dps' when it is included in ont='MF'
#BiocManager::install("GO.db")
# Assuming "dps" is a symbol, get the ENTREZID for it
dps_entrez <- AnnotationDbi::select(org.Bsubtilis.eg.db, keys = "dps", keytype = "SYMBOL", columns = "ENTREZID")
# Get GO annotations for "dps"
dps_go_annotations <- AnnotationDbi::select(org.Bsubtilis.eg.db, keys = dps_entrez$ENTREZID, columns = c("GO", "ONTOLOGY"), keytype = "ENTREZID")
print(dps_go_annotations)
# Perform enrichment analysis for 'MF'
ego_mf <- enrichGO(gene = gene_list,
OrgDb = org.Bsubtilis.eg.db,
keyType = "SYMBOL",
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# Perform enrichment analysis for 'BP'
ego_bp <- enrichGO(gene = gene_list,
OrgDb = org.Bsubtilis.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# Perform enrichment analysis for 'ALL'
ego_all <- enrichGO(gene = gene_list,
OrgDb = org.Bsubtilis.eg.db,
keyType = "SYMBOL",
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
# Compare results
print(head(ego_mf))
print(head(ego_bp))
print(head(ego_all))
# Check if "dps" is included in the results
included_genes_mf <- unique(unlist(strsplit(ego_mf@result$geneID, "/")))
included_genes_bp <- unique(unlist(strsplit(ego_bp@result$geneID, "/")))
included_genes_all <- unique(unlist(strsplit(ego_all@result$geneID, "/")))
"dps" %in% included_genes_mf
"dps" %in% included_genes_bp
"dps" %in% included_genes_all