1. 代謝組數(shù)據(jù)示例(test_meta.txt)
SampleID mws1401
A13 0.00
A16 0.00
A7 0.00
A17 0.00
A11 0.00
A3 0.00
A12 0.00
A20 0.00
A19 0.00
A15 5.98
A14 5.32
A9 0.00
A8 6.01
A5 6.04
2.基因表達(dá)譜數(shù)據(jù)示例(test_gene.txt)
GeneID A13 A16 A7 A17 A11 A3 A12 A20 A19 A15 A14 A9 A8 A5
Gene1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 5.98 5.32 0.00 6.01 6.04
Gene2 5.59 5.71 5.74 5.49 5.62 5.79 5.70 6.02 5.94 5.52 5.72 5.43 5.87 5.95
Gene3 5.26 5.96 6.49 5.58 5.97 6.58 6.10 6.72 6.51 6.52 6.37 5.87 6.50 6.92
Gene4 5.62 5.51 4.36 5.91 5.27 5.89 4.94 6.62 5.38 5.95 5.82 5.44 5.91 6.64
Gene5 4.35 4.35 4.85 4.02 4.49 5.02 4.50 4.87 4.71 5.13 5.28 5.20 4.88 5.27
腳本:
# 讀取數(shù)據(jù)
meta_file <- "test_meta.txt"
gene_exp <- "test_gene.txt"
output <- "test_result.txt"
# 讀取數(shù)據(jù)并進(jìn)行轉(zhuǎn)置
meta_data <- as.data.frame(read.table(file = meta_file, header = TRUE, row.names = 1, sep = "\t"))
gene_data <- as.data.frame(read.table(file = gene_exp, header = TRUE, row.names = 1))
gene_data <- t(gene_data)
# 獲取列名
meta.names <- colnames(meta_data)[1:length(colnames(meta_data))]
gene.names <- colnames(gene_data)[1:length(colnames(gene_data))]
# 初始化空數(shù)據(jù)框 out
out <- data.frame(matrix(ncol = 4, nrow = 0))
colnames(out) <- c("meta", "gene", "cor", "p")
# 數(shù)據(jù)標(biāo)準(zhǔn)化
meta_data_scaled <- as.data.frame(scale(meta_data))
gene_data_scaled <- as.data.frame(scale(gene_data))
# 循環(huán)計(jì)算相關(guān)性
for (gene in gene.names) {
? dd <- data.frame(gene1 = gene_data_scaled[, gene], meta_data_scaled)
? for (meta in meta.names) {
? ? form <- paste(meta, "~ gene1", sep = " ")
? ? form <- as.formula(form)
? ? lm.result <- lm(form, dd)
? ? cor <- as.numeric(summary(lm.result)$coefficients[, 'Estimate'][2])
? ? p <- as.numeric(summary(lm.result)$coefficients[, 'Pr(>|t|)'][2])
? ? tmp <- data.frame(meta = meta, gene = gene, cor = cor, p = p)
? ? out <- rbind(out, tmp)
? }
}
# 寫入結(jié)果
write.table(out, file = output, quote = FALSE, sep = "\t")
還可以添加群體結(jié)構(gòu)(PCA結(jié)果作為其他自變量/協(xié)變量)。下面腳本中沒做數(shù)據(jù)標(biāo)準(zhǔn)化處理,表頭可能也有問題。
meta_file <- "test_meta.txt"
gene_exp <- "test_gene.txt"
pca_cor = read.table("pca_sort.txt",header=T,sep=" ")
output <- "test_result.txt"
meta_data=as.data.frame(read.table(file=meta_file,header=T,row.names=1,sep="\t"))
gene_data=as.data.frame(read.table(file=gene_exp,header=T,row.names=1))
meta.names=colnames(meta_data)[1:length(colnames(meta_data))]
gene.names=colnames(gene_data)[1:length(colnames(gene_data))]
out=c("meta","Gene","cor","p")
for (gene in gene.names) {
? dd=data.frame(gene1 = gene_data[,gene], pc1 = pca_cor$PC1, pc2 = pca_cor$PC2, pc3 = pca_cor$PC3, meta_data[,1:length(colnames(meta_data))])
for(meta in meta.names) {
? form <- paste(meta,"~ gene1+pc1+pc2+pc3",sep=" ")
? form <- as.formula(form)
? print(form)
? lm.result <- lm(form,dd)
? cor=as.numeric(summary(lm.result)$coefficients[,'Estimate'][2])
? p=as.numeric(summary(lm.result)$coefficients[,'Pr(>|t|)'][2])
? tmp=cbind(meta,gene,cor,p)
? out=rbind(out,tmp)
}
}
write.table(out,file=output,quote=FALSE,sep="\t")