4.檢查數(shù)據(jù)質(zhì)量
# 清空環(huán)境
rm(list = objects( all = TRUE ))
if (!is.null( dev.list() ))
dev.off()
if (!dir.exists('./fig/')) {
dir.create("./fig/")
}
load(file = './data/final_AssayData.Rdata')#加載之前保存的結(jié)果
#畫(huà)PCA圖檢查數(shù)據(jù)
pca_data <- as.data.frame(t(AssayData))
library("FactoMineR")
library("factoextra")
dat.pca <- PCA(pca_data, graph = FALSE)
get_eig(dat.pca)
fviz_screeplot(dat.pca,
addlabels = TRUE,
ylim = c(0, 50))
## Contributions of variables to PC1
fviz_contrib(dat.pca, choice = "var", axes = 1, top = 10)
fviz_pca_ind( dat.pca,
geom.ind = "point",
col.ind = group_list,
palette = c("#00AFBB", "#FC4E07"),
addEllipses = TRUE,
legend.title = "Groups" )
library("ggfortify")
pca_data$group = group_list
autoplot( prcomp( pca_data[, 1:(ncol( pca_data ) - 1)] ),
data = pca_data,
colour = 'group') + theme_bw()
PCA圖
5.差異分析與可視化
library( "limma" ) #著名的limma包,做差異分析使用怕午,還有其他的,這一段不明白怎么寫(xiě)出來(lái)的琳钉,我也在看說(shuō)明書(shū)菠剩。
{
design <- model.matrix( ~0 + factor( group_list ) )
colnames( design ) = levels( factor( group_list ) )
rownames( design ) = colnames( AssayData )
}
design#檢查一下
contrast.matrix <- makeContrasts( "TNBC-control", levels = design ) #用乳腺癌的數(shù)據(jù)比上正常對(duì)照數(shù)據(jù),一般都是用癌癥:正常組織
contrast.matrix
{ fit <- lmFit( AssayData, design )
fit2 <- contrasts.fit( fit, contrast.matrix )
fit2 <- eBayes( fit2 )
nrDEG <- topTable( fit2, coef = 1, n = Inf ) save(nrDEG, file = './data/nrDEG.Rdata')
}
head(nrDEG)
# 差異分析到此結(jié)束鄙煤,取上調(diào)和下調(diào)前50的基因畫(huà)一個(gè)熱圖看看效果
library( "pheatmap" )
{
nrDEG_Z = nrDEG[ order( nrDEG$logFC ), ] nrDEG_F = nrDEG[ order( -nrDEG$logFC ), ]
choose_gene = c( rownames( nrDEG_Z )[1:50], rownames( nrDEG_F )[1:50] )
choose_matrix = AssayData[ choose_gene, ] choose_matrix = t( scale( t( choose_matrix ) ) ) choose_matrix[choose_matrix > 2] = 2 choose_matrix[choose_matrix < -2] = -2
annotation_col = data.frame( CellType = factor( group_list ) )
rownames( annotation_col ) = colnames( AssayData )
pheatmap( fontsize = 6,
choose_matrix,
annotation_col = annotation_col, show_rownames = T,
show_colnames = F,
annotation_legend = T,
cluster_cols = F,
filename = "./fig/heatmap_top100_logFC.png") }
直接貼圖晾匠,后續(xù)可以分享代碼討論~
熱圖
火山圖
氣泡圖
小tips:
1.setwd()很重要;
2.學(xué)會(huì)使用包很重要;
3.代碼邏輯很重要;
4.了解你的數(shù)據(jù)最重要~jimmy說(shuō)的梯刚,四腳贊同
從我學(xué)R的經(jīng)歷來(lái)看凉馆,先把基礎(chǔ)的知識(shí)掌握好,然后再談跑代碼亡资,這些就算放在這咱也看不懂是不澜共。我也是按照代碼一個(gè)一個(gè)查,一遍又一遍試才真正跑出來(lái)的結(jié)果锥腻,我也試過(guò)跑自己想做的方向嗦董,同樣問(wèn)題多多,但總歸也算有結(jié)果旷太!結(jié)果出來(lái)那一刻很開(kāi)心展懈,但學(xué)習(xí)永不止步,發(fā)現(xiàn)花花的公眾號(hào)更簡(jiǎn)單一下供璧,jimmy永遠(yuǎn)的神存崖,每每看公眾號(hào)文章,總能有所啟發(fā)(快點(diǎn)學(xué)R和linux睡毒,需要伙伴一起學(xué)習(xí)討論進(jìn)步~)
由于每天都是實(shí)驗(yàn)室搬磚来惧,我電腦和數(shù)據(jù)都沒(méi)帶回來(lái),純粹是靠筆記來(lái)copy和paste演顾,后面再貼剩下的代碼吧供搀,好累隅居,過(guò)七夕腳都要逛斷了:)