2025 - 全网最牛的生物信息学分析 - 一键式生成DIFF_GSEA_WGCNA_GO_KEGG_DO
先给你炫一下图
直接上代码
setwd("/Users/wangyang/Desktop/BCBM/02DIFF_GSEA_WGCNA")
#引用包
library(ggplot2)
library(limma)
library(pheatmap)
library(ggsci)
library(dplyr)
lapply(c('clusterProfiler','enrichplot','patchwork'), function(x) {
library(x, character.only = T)})
library(org.Hs.eg.db)
library(patchwork)
library(WGCNA)
library(GSEABase)
#参数设置
GSE="GSExxxxxx" #表达矩阵文件名称,不用后缀
C="C" #正常控制组名称
P="P" #疾病实验组的名称
Ccol="blue" #热图注释条正常组颜色
Pcol="red" #热图注释条疾病组颜色
rt=read.table(paste0(GSE,".txt"),sep="\t",header=T,check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
rt=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
rt=avereps(rt)
######################################################################################################################1.数据准备
#分组
sample=read.table("sample.txt",sep="\t",header=F,check.names=F,row.names = 1)
rt=rt[,rownames(sample)]
afcon=sum(sample[,1]==C)
#判断原始数据是否去了log
max(rt)
if(max(rt)>50) rt=log2(rt+1) #rt最大值大于30则取log
#使用normalizeBetweenArrays进行矫正,矫正后赋值为rt1
rt1=normalizeBetweenArrays(as.matrix(rt))
#未标准化,mar调整画布范围下左上右,自行调整哈
cols=rainbow(ncol(rt)) ###针对24个样本,设置颜色,整体呈现彩虹色
pdf(file = "1.raw.pdf",width=5,height = 4)
par(cex = 0.7,mar=c(8,8,8,8))
if(ncol(rt)>40) par(cex = 0.5,mar=c(8,8,8,8)) ###设置字体大小
boxplot(rt,las=2,col =cols ) ###绘图
dev.off()
#标准化
cols=rainbow(ncol(rt1)) ###针对24个样本,设置颜色,整体呈现彩虹色
pdf(file = "1.nor.pdf",width=5,height = 4.5)
par(cex = 0.5,mar=c(8,8,8,8))
if(ncol(rt1)>40) par(cex = 0.5,mar=c(8,8,8,8)) ###设置字体大小
boxplot(rt1,las=2,col =cols ) ###绘图
dev.off()
#保存标准化后结果
rt2=rbind(ID=colnames(rt1),rt1)
write.table(rt2,file=paste0("1.","norexp_",GSE,".txt"),sep="\t",quote=F,col.names = F)
#保留原始结果
rt3=rbind(ID=colnames(rt),rt)
write.table(rt3,file=paste0("1.","rawexp_",GSE,".txt"),sep="\t",quote=F,col.names = F)
#######################################################################################################################2.差异分析
data=rt1
#data=rt
conData=data[,as.vector(colnames(data)[1:afcon])]
aftreat=afcon+1
treatData=data[,as.vector(colnames(data)[aftreat:ncol(data)])]
rt=cbind(conData,treatData)
conNum=ncol(conData)
treatNum=ncol(treatData)
#limma差异标准流程
Type=c(rep("con",conNum),rep("treat",treatNum))
design <- model.matrix(~0+factor(Type))
colnames(design) <- c("con","treat")
fit <- lmFit(rt,design)
cont.matrix<-makeContrasts(treat-con,levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
Diff=topTable(fit2,adjust='fdr',number=length(rownames(data)))
#保存所有基因的差异结果
DIFFOUT=rbind(id=colnames(Diff),Diff)
write.table(DIFFOUT,file=paste0("2.","DIFF_all.xls"),sep="\t",quote=F,col.names=F)
diffSig=Diff[with(Diff, (abs(logFC)>1 & adj.P.Val < 0.05 )), ]
diffSigOut=rbind(id=colnames(diffSig),diffSig)
write.table(diffSigOut,file=paste0("2.","DIFF_less.xls"
标签:rt,KEGG,WGCNA,library,2025,ncol,rt1,data,###
From: https://blog.csdn.net/itwangyang520/article/details/143538184