01
差異表達(dá)分析
數(shù)據(jù)下載Xena - TCGA數(shù)據(jù)下載 - 簡書 (jianshu.com)
#數(shù)據(jù)準(zhǔn)備
> expr = read.table("TCGA-GBM.htseq_counts.tsv",header = T, row.names = "Ensembl_ID")
> expr = expr**2.7
> colnames(expr) = gsub("[A..Z]$","",colnames(expr))
row.names(dat_estimation)=dat_estimation$ID
common_id = intersect(row.names(dat_estimation),colnames(expr))
select=dat_estimation[common_id, ]
>select = select[order(select$Stromal_group),]
> raw_data = expr[,select$ID]
library(limma)
library(dplyr)
> library(edgeR)
# 設(shè)置樣本分組
> group_list <- select$Stromal_group
> dge<-edgeR::DGEList(count=raw_data,
+ group = group_list)
design=data.frame(group=as.factor(group_list))
rownames(design)=colnames(raw_data)
#對數(shù)據(jù)進(jìn)行過濾
cData=model.matrix(~0+group,data=design)
colnames(cData)=c("high","low")
keep <- filterByExpr(dge, cData)
dge <- dge[keep,,keep.lib.sizes=FALSE]
#對數(shù)據(jù)進(jìn)行歸一化
#degsList
v <- voom(dge, cData, plot=TRUE)
#設(shè)計(jì)比較矩陣
cont.matri=makeContrasts(highvslow=high-low,levels = cData)
#擬合分析
fit <- lmFit(v, cData)
fit2 <- contrasts.fit(fit, cont.matri)
fit2 <- eBayes(fit2)
degs_results=topTable(fit2,adjust.method = "BH",number=Inf,sort.by = "P")
> head(degs_results)
logFC AveExpr t P.Value adj.P.Val
ENSG00000163513.16 0.10789269 6.801653 6.562867 1.574322e-09 5.465393e-06
ENSG00000204472.11 0.16016097 6.747415 6.506765 2.069340e-09 5.465393e-06
ENSG00000148175.11 0.11463904 6.944243 6.502291 2.114867e-09 5.465393e-06
ENSG00000153029.13 0.12084383 6.701478 6.483350 2.318792e-09 5.465393e-06
ENSG00000145685.12 0.08435916 6.838662 6.339794 4.641324e-09 8.751681e-06
ENSG00000021355.11 0.13465408 6.706506 6.076140 1.630360e-08 2.561840e-05
B
ENSG00000163513.16 11.422607
ENSG00000204472.11 11.200813
ENSG00000148175.11 11.093485
ENSG00000153029.13 11.102485
ENSG00000145685.12 10.355359
ENSG00000021355.11 9.232985
02
差異基因的富集分析
diff_gene =
篩選基因
> diff_gene <-subset(degs_results, adj.P.Val < 0.05 & abs(logFC) > 0.41)
篩選上調(diào)基因
up_diff_gene <-subset(degs_results, adj.P.Val < 0.05 & logFC > 0.41)
>up_diff_gene$gene_id = gsub("\\.[0-9]{1,}","",row.names(up_diff_gene))
對基因命名
up_diff_gene$symbol <- mapIds(org.Hs.eg.db,
keys=up_diff_gene$gene_id,
column="SYMBOL",
keytype="ENSEMBL",
multiVals="first")
BP層面上的富集分析:
> go_bp<-enrichGO(gene = up_diff_gene$symbol,OrgDb = org.Hs.eg.db,keyType = 'SYMBOL', ont = "BP", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05)
做成氣泡圖形式
dotplot(go_bp)
image.png