rm(list = ls())#清空列表
options(stringsAsFactors = F)#設(shè)定全局變量
library(foreign)#加載外部數(shù)據(jù)需要的包
library(stringr)#處理字符串需要的包
library(dplyr)#清洗數(shù)據(jù)需要的包
lname <- load(file = "group.Rdata")載入第1步的矩陣數(shù)據(jù)及分組數(shù)據(jù)
lname#查看數(shù)據(jù)名稱
數(shù)據(jù)名稱
keep_agc <- rowSums(liver.count>0) >= floor(0.75*ncol(liver.count))#生成程邏輯值向量脱拼,過濾所有樣本中表達(dá)都為零及百分之75樣本中都不表達(dá)的樣本
filter.liver.count <- liver.count[keep_agc,]#生成肝過濾矩陣
keep_acc <- rowSums(colon.count>0) >= floor(0.75*ncol(colon.count))
filter.colon.count<- colon.count[keep_acc,]#生成腸過濾矩陣
keep_wbc <- rowSums(wbc.count>0) >= floor(0.75*ncol(wbc.count))
filter.wbc.count<- wbc.count[keep_wbc,]#生成血標(biāo)本過濾矩陣
以下對(duì)數(shù)據(jù)進(jìn)行標(biāo)準(zhǔn)化
library(preprocessCore)#標(biāo)準(zhǔn)化需要包
#以下對(duì)count數(shù)進(jìn)行l(wèi)og2轉(zhuǎn)化并進(jìn)行標(biāo)準(zhǔn)化
normal.log2.colon = normalize.quantiles(log2(as.matrix(filter.colon.count)+1))
colnames(normal.log2.colon) = colnames(filter.colon.count)
rownames(normal.log2.colon) = rownames(filter.colon.count)
normal.log2.liver = normalize.quantiles(log2(as.matrix(filter.liver.count)+1))
colnames(normal.log2.liver) = colnames(filter.liver.count)
rownames(normal.log2.liver) = rownames(filter.liver.count)
normal.log2.wbc = normalize.quantiles(log2(as.matrix(filter.wbc.count)+1))
colnames(normal.log2.wbc) = colnames(filter.wbc.count)
rownames(normal.log2.wbc) = rownames(filter.wbc.count)
library(edgeR)#對(duì)樣本進(jìn)行CPM轉(zhuǎn)化需要的包
#以下對(duì)count數(shù)進(jìn)行cpm轉(zhuǎn)化并進(jìn)行標(biāo)準(zhǔn)化
cpm_normal_liver <- normalize.quantiles(log2(cpm(filter.liver.count)+1))
colnames(cpm_normal_liver) = colnames(filter.liver.count)
rownames(cpm_normal_liver) = rownames(filter.liver.count)
cpm_normal_colon <- normalize.quantiles(log2(cpm(filter.colon.count)+1))
colnames(cpm_normal_colon) = colnames(filter.colon.count)
rownames(cpm_normal_colon) = rownames(filter.colon.count)
cpm_normal_wbc <- normalize.quantiles(log2(cpm(filter.wbc.count)+1))
colnames(cpm_normal_wbc) = colnames(filter.wbc.count)
rownames(cpm_normal_wbc) = rownames(filter.wbc.count)
#保存標(biāo)準(zhǔn)化的數(shù)據(jù)及原始數(shù)據(jù)
save(liver.list,colon.list,wbc.list,
filter.colon.count,filter.liver.count,filter.wbc.count,
cpm_normal_colon,cpm_normal_liver,cpm_normal_wbc,
normal.log2.liver,normal.log2.colon,normal.log2.wbc,
file = "normal_data.Rdata")