1、數(shù)據(jù)處理
數(shù)據(jù):https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE141295
解壓瓢棒,所有的文件到input文件夾
a = list.files("input") #(文件名) "GSM4200430_IgAN1.txt"
dir = paste("./input/",a,sep="") #(文件夾的名字浴韭,路徑)"./input/GSM4200448_CTRL7.txt"
n = length(dir) # 24
merge.data = read.table(file = dir[1],header=F,dec = ".") #(先倒入第一個,后面就不用定義data脯宿。frame)
for (i in 2:n){
new.data = read.table(file = dir[i], header=F, dec = ".")
merge.data = merge(merge.data,new.data,by = "V1")
}
b<- substr(a,1,16) #切割基因文件名
c <- c("geneid",b)
colnames(merge.data) <- c
merge.data <- merge.data[-c(1:5),]
write.csv(merge.data, file = "merge.data")
exprSet <- merge.data
input:比對過了念颈,倒進(jìn)來的編號都對的上的
image.png
2、DESeq2差異化分析
2-1连霉,先整理一下exprSet
library(stringr)
exprSet <- merge.data
exprSet$geneid <- substr(exprSet$geneid,1,15) #切割基因ID
exprSet <- exprSet[!duplicated(exprSet$geneid),] #去重復(fù)
row.names(exprSet) <- exprSet$geneid
exprSet <- exprSet[,-1]
input:
image.png
2-2 DESeq2差異化分析
#樣本矩陣
colData <- read.csv("pdata-腎病.csv", header = T) #自己在外面構(gòu)建的樣本矩陣
row.names(colData) <- colData$X
coldata2 <- colData[2]
#DESeq2差異性分析
library(DESeq2)
dds <- DESeqDataSetFromMatrix(countData = exprSet,colData = colData,design = ~ condition)
dds <- DESeq(dds)
res <- results(dds, contrast=c("condition","IgAN","CTRL"))
DEG <- as.data.frame(res)
DEG = DEG[order(DEG$pvalue),]
write.csv(DEG,file="腎癌DEseq差異分析結(jié)果.csv")
input:
image.png
image.png
3榴芳、可視化-火山圖
library(ggplot2)
#3-4 DESeq2結(jié)果可視化
#定義篩選
DEG[which(DEG$padj %in% NA),'sig'] <- 'no diff'
DEG[which(DEG$log2FoldChange >= 1 & DEG$padj < 0.05),'sig'] <- 'rich (p.adj < 0.05, log2FC >= 1)'
DEG[which(DEG$log2FoldChange <= -1 & DEG$padj < 0.05),'sig'] <- 'down (p.adj < 0.05, log2FC <= -1)'
DEG[which(abs(DEG$log2FoldChange) < 1 | DEG$padj >= 0.05),'sig'] <- 'no diff'
#畫圖
volcano_p2 <- ggplot(DEG, aes(log2FoldChange, -log(padj, 10))) +
geom_point(aes(color = sig), alpha = 0.6, size = 1) +
scale_color_manual(values = c('blue2', 'gray30', 'red2')) +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), legend.position = c(0.26, 0.92)) +
theme(legend.title = element_blank(), legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) +
geom_vline(xintercept = c(-1, 1), color = 'gray', size = 0.25) +
geom_hline(yintercept = -log(0.05, 10), color = 'gray', size = 0.25) +
labs(x = 'log2 Fold Change', y = '-log10 p-value', color = NA) +
xlim(-5, 5)
ggsave('IgAN-volcano_p.png', volcano_p, width = 5, height = 6)
input
image.png
4、可視化 MA-plot
library(BiocGenerics)
plotMA(res,ylim = c(-5,5))
input ;差異基因窘面,標(biāo)記為藍(lán)色
Image.png
5翠语、熱圖(差異基因表達(dá)量熱圖)
na.fail(DEG)
DEG <- na.omit(DEG) #去除NA值
library(pheatmap)
diff_gene <-subset(DEG, padj < 0.05 & abs(log2FoldChange) > 1) #挑選出p < 0.05并且log2FoldChange)> 1的基因出來
diff_gene_sort <- diff_gene[order(diff_gene$padj),]
choose_gene <- head(rownames(diff_gene_sort),50) #按照p值排序,取前面的50個
choose_matrix <- exprSet[choose_gene,]
choose_matrix_scale <- scale(choose_matrix)
pheatmap(choose_matrix_scale, show_rownames = F, show_colnames = F,
annotation_col = coldata2)
input:
image.png