p_load(tidyverse, DESeq2, pheatmap, ggrepel, ggsci, reshape2,
GSVA, GSEABase, clusterProfiler, aPEAR)
dir.create('./GSEA_analysis')
color <- c(pal_npg(alpha = 1)(10),pal_nejm(alpha=1)(8))
### Construct the DESeq2 object
countData <- read.csv('count.csv', row.names = 1)
condition <- factor(gsub("_[0-9]$", "", colnames(countData)))
colData <- data.frame(row.names = colnames(countData), condition)
dds <- DESeqDataSetFromMatrix(countData, colData, design=~condition)
### Vst() for PCA analysis
rld <- vst(dds, blind=T)
p <- plotPCA(rld)
pcaData <- p$data
median <- pcaData %>% group_by(group) %>% summarize(X1=median(PC1), X2 = median(PC2))
xlab_text <- p$labels$x
ylab_text <- p$labels$y
p1 <- ggplot(pcaData, aes(PC1, PC2, color = group)) +
geom_point(size = 3) +
scale_color_manual(values = color[1:length(table(pcaData$group))]) +
geom_text_repel(aes(X1, X2, label = group), data = median, family="serif") +
xlab(xlab_text) + ylab(ylab_text) +
ggtitle("Sample_PCA_analysis") +
theme_classic() +
theme(legend.position = "none", text=element_text(family = "serif"))
ggsave("./GSEA_analysis/1.sample_PCA.pdf", p1, height=4, width=6)
### Correlation analysis on samples
rld_mat <- assay(rld)
rld_cor <- cor(rld_mat)
rld_cor_gg <- melt(rld_cor)
rld_cor_gg$value <- scale(rld_cor_gg$value)
p2 <- ggplot(rld_cor_gg, aes(x=Var1,y=Var2, fill=value)) +
geom_raster() +
scale_fill_gradient2(low='royalblue2',high='firebrick2',mid="grey95") +
ggtitle("Sample_correlation") +
xlab(NULL) + ylab(NULL) +
theme_classic() +
theme(legend.position = "none", text=element_text(family = "serif"))
ggsave("./GSEA_analysis/2.sample_correlation.pdf", p2, height=4, width=6)
### Construct the DESeq2 object and perform the analysis
countData <- read.csv('count.csv', row.names = 1)
### countData <- filter_all(countData, any_vars(. > 10))
condition <- factor(c(rep("ctrl",(ncol(countData)/2)),rep("treated",(ncol(countData)/2))))
colData <- data.frame(row.names = colnames(countData), condition)
dds <- DESeqDataSetFromMatrix(countData, colData, design=~condition)
dds <- DESeq(dds)
saveRDS(dds, './GSEA_analysis/3.DESeq2_result.rds')
res <- results(dds)
resdata <- as.data.frame(res)
### To avoid the padj value goes too low
resdata$padj[resdata$padj<1e-200] <- 1e-200
### Separate the change into "UP", "DN", and "Stable"
resdata$change = ifelse(resdata$padj < 0.05 & abs(resdata$log2FoldChange) >= 2,
ifelse(resdata$log2FoldChange >= 2, 'Up', 'Dn'), 'Stable')
resdata <- na.omit(resdata)
write.csv(resdata, './GSEA_analysis/3.DESeq2_result.csv')
### Prepare the text information for labeling
resdata_sig <- subset(resdata, (padj < 0.05 & abs(log2FoldChange) >= 2))
resdata_sig <- resdata_sig %>% top_n(20, -log10(padj))
if(any(resdata$change == "Dn")){
color_panel <- c('royalblue2', "grey95", 'firebrick2')
} else {
color_panel <- c("grey95", 'firebrick2')
}
### Draw the graph
p3 <- ggplot(resdata, aes(log2FoldChange, -log(padj, 10), colour = change)) +
geom_point(alpha=0.8, size = 2) +
scale_color_manual(values=color_panel) +
xlab(expression("log"[2]*"FC")) + ylab(expression("-log"[10]*"FDR")) +
geom_vline(xintercept=c(-2,2),lty=4,col="lightgrey",lwd=0.8) +
geom_hline(yintercept = -log10(0.05),lty=4,col="lightgrey",lwd=0.8) +
geom_label_repel(aes(label = rownames(resdata_sig)), data = resdata_sig, family="serif",
max.overlaps = 100, max.iter = 1e9, force = 1e2) +
ggtitle("Significant_genes (padj < 0.05 & log2FC > 2)",
subtitle = paste0("Down: ",count(resdata$change == "Dn"),", ",
"Up: ",count(resdata$change == "Up"),sep="")) +
theme_classic() +
theme(legend.position = "none", text=element_text(family="serif"))
ggsave("./GSEA_analysis/3.volcano.pdf", p3, height=4, width=6)
### Prepare the DESeq2 result for GSEA analysis
dds <- readRDS('./GSEA_analysis/3.DESeq2_result.rds')
dds <- data.frame(results(dds))
log2FC_matrix <- na.omit(dds)
log2FC_matrix <- log2FC_matrix[log2FC_matrix$baseMean > 1,]
GSEA <- log2FC_matrix$stat
names(GSEA) <- rownames(log2FC_matrix)
GSEA <- sort(GSEA, decreasing=T)
### Load the geneset information based on human or mouse
if('GAPDH' %in% names(GSEA)){
df <- readRDS("~/Desktop/Saved_index_for_R/GSEA_genesets_hs_2024.rds")
} else {
df <- readRDS("~/Desktop/Saved_index_for_R/GSEA_genesets_mm_2024.rds")
}
### Perform the GSEA analysis
GSEA_ALL <- GSEA(GSEA, TERM2GENE=df, eps=0)
GSEA_matrix <- GSEA_ALL@result
### Save result
saveRDS(GSEA_ALL, './GSEA_analysis/4.GSEA_result.rds')
GSEA_matrix <- GSEA_matrix %>% select('ID', 'NES', 'p.adjust') %>%
tibble() %>% arrange(NES) %>% mutate(logP = -log10(p.adjust)) %>%
mutate(color = if_else(NES > 2, 'royalblue2', if_else(NES < -2, 'firebrick2', 'grey95')))
GSEA_matrix$ID <- gsub('_',' ',GSEA_matrix$ID)
GSEA_top <- rbind(GSEA_matrix %>% top_n(-10, NES), GSEA_matrix %>% top_n(10, NES))
p4 <- ggplot(GSEA_matrix, aes(x = NES, y = logP, color = color)) +
geom_point() + theme_classic() +
geom_label_repel(aes(label = ID), GSEA_top, size = 2.5, direction="both") +
scale_color_manual(values = c('royalblue2','grey95','firebrick2')) +
xlab('NES of enrichment') + ylab(expression("Log"[10]*"(p.adj)")) +
geom_vline(xintercept=c(-2, 2),lty=4,col="grey",lwd=0.5) +
theme(legend.position = 'none') +
ggtitle("GSEA_results_on_genesets")
ggsave("./GSEA_analysis/4_GSEA.pdf", p4, height=6, width=12)
### Perform filtering of the GSEA result
# GSEA_matrix$Description <- gsub("WP_","",GSEA_matrix$Description)
# GSEA_matrix$Description <- gsub("REACTOME_","",GSEA_matrix$Description)
# GSEA_matrix$Description <- gsub("KEGG_","",GSEA_matrix$Description)
# GSEA_matrix$Description <- gsub("GOMF_","",GSEA_matrix$Description)
# GSEA_matrix$Description <- gsub("GOCC_","",GSEA_matrix$Description)
# GSEA_matrix$Description <- gsub("GOBP_","",GSEA_matrix$Description)
GSEA_matrix <- GSEA_ALL@result
GSEA_matrix$Description <- gsub("_", " ", GSEA_matrix$ID)
# GSEA_matrix$Description <- tolower(GSEA_matrix$Description)
GSEA_matrix <- rbind(GSEA_matrix %>% top_n(100,NES),
GSEA_matrix %>% top_n(-100,NES))
GSEA_matrix <- GSEA_matrix[unique(GSEA_matrix$ID),]
### Use the aPEAR package
set.seed(123)
data <- findPathClusters(GSEA_matrix, cluster = 'hier', minClusterSize = 3)
p5 <- plotPathClusters(GSEA_matrix, data$sim, data$clusters,
repelLabels = TRUE, drawEllipses = TRUE)
ggsave("./GSEA_analysis/5.GSEA_network.pdf", p5, height=8, width=12)
my_RNAseq_processure
?著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事?!?“怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 国际黄金行情走势分析: 周一(3月8日)亚洲时段,现货黄金延续上周五纽约时段涨势,最高触及1714美元附近,目前交...
- 今天是小姐姐们的节日。 我老司机一次搞大以强悍忠诚的“女士用品”身份,在这里祝所有小姐姐们日进斗精。 如何用商业电...