> rm(list = ls()) #清除所有變量
> load(file = "step2output.Rdata")#加載之前的數(shù)據(jù)
#輸入數(shù)據(jù):exp表達(dá)數(shù)據(jù)和group_list因子化分組
#Principal Component Analysis
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials
PCA降維——第一步制圈,將exp數(shù)據(jù)轉(zhuǎn)置们童,降維為甚需轉(zhuǎn)置呢?
> exp[1:4,1:4]#探針id和樣本名
GSM1052615 GSM1052616 GSM1052617 GSM1052618
7892501 7.24559 6.80686 7.73301 6.18961
7892502 6.82711 6.70157 7.02471 6.20493
7892503 4.39977 4.50781 4.88250 4.36295
7892504 9.48025 9.67952 9.63074 9.69200
> t(exp[1:4,1:4])#轉(zhuǎn)置
7892501 7892502 7892503 7892504
GSM1052615 7.24559 6.82711 4.39977 9.48025
GSM1052616 6.80686 6.70157 4.50781 9.67952
GSM1052617 7.73301 7.02471 4.88250 9.63074
GSM1052618 6.18961 6.20493 4.36295 9.69200
#View(t(exp))
第二步鲸鹦,PCA降維
> {
dat=as.data.frame(t(exp))#exp表達(dá)矩陣數(shù)據(jù)框
library(FactoMineR)#FactoMineR(負(fù)責(zé)分析標(biāo)準(zhǔn)化數(shù)據(jù))
library(factoextra)#factoextra(負(fù)責(zé)可視化)
dat.pca <- PCA(dat, graph = FALSE)
#dat從exp得來;graph邏輯值慧库,TRUE的話自動畫圖
#FactoMineR中的PCA()自動幫助dat進(jìn)行標(biāo)準(zhǔn)化
#下面用factoextra包里面的函數(shù)進(jìn)行可視化操作
fviz_pca_ind(dat.pca,#fviz_pca_ind對單個變量進(jìn)行畫圖
geom.ind = "point", # 只顯示點(diǎn)
col.ind = group_list, # color by groups用顏色指示值
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
ggsave('all_samples_PCA.png')#保存在工作目錄下
> }
all_samples_PCA.png
dim1和dim2分別代表什么?
-
熱圖——第一步馋嗜,數(shù)據(jù)準(zhǔn)備
數(shù)據(jù)exp.png
> cg=names(tail(sort(apply(exp,1,sd)),1000))#sd是標(biāo)準(zhǔn)差
#apply(X, MARGIN, FUN, ...)#X 陣列
#MARGIN 1表示矩陣行齐板,2表示矩陣列,也可以是c(1,2)
#對數(shù)組或者矩陣的一個維度使用函數(shù)生成值得列表或者數(shù)組葛菇、向量甘磨。
#sort()默認(rèn)升序
#view(cg)
#head(cg)
#head(exp)
> n=exp[cg,]#篩選行,exp中取1000子集
> head(n)
GSM1052615 GSM1052616 GSM1052617 GSM1052618 GSM1052619 GSM1052620
7893963 5.79288 5.27317 6.08883 4.52216 5.10400 4.48187
7895624 7.71782 8.01318 8.40404 7.21848 8.01405 6.59685
8109830 11.47330 11.30540 11.42210 10.37890 10.24600 10.05490
7953844 10.10830 9.97290 10.09840 8.98978 8.82521 8.81154
8156126 9.18541 8.87702 9.23965 7.97783 7.97500 7.84363
8152119 7.09276 6.98717 7.13838 8.26911 8.25215 8.26818
第二步,將分組數(shù)據(jù)轉(zhuǎn)換成數(shù)據(jù)框眯停,如下圖所示
> annotation_col=data.frame(group=group_list)#轉(zhuǎn)成數(shù)據(jù)框方便增加,列名
group
GSM1052615 control
GSM1052616 control
GSM1052617 control
GSM1052618 treat
GSM1052619 treat
GSM1052620 treat
分組數(shù)據(jù)框.png
第三步济舆,畫熱圖
> rownames(annotation_col)=colnames(n) #行名
[1] "GSM1052615" "GSM1052616" "GSM1052617" "GSM1052618" "GSM1052619" "GSM1052620"
#View(annotation_col)
> library(pheatmap)
> pheatmap(n,
show_colnames =F,#是否顯示列名
show_rownames = F,
annotation_col=annotation_col,#增加分組的欄
scale = "row")#橫的比較
> dev.off()
Rplot.png