使用R語言進行相關性分析熱圖的繪制

cor函數(shù)

Pearson、Spearman跌前、Kendall相關系數(shù)都可以通過cor函數(shù)實現(xiàn)棕兼，cov協(xié)方差函數(shù)參數(shù)同cor函數(shù)。

（1）用法

cor(x,y=NULL,use="everything",method= c("pearson","kendall","spearman"))
cor(x, use='everything', method='pearson')  #計算矩陣相關系數(shù) 
cor(mtcars$mpg, mtcars$cyl)  #計算兩兩相關系數(shù)

x：矩陣或數(shù)據(jù)框抵乓。
use：指定缺失數(shù)據(jù)的處理方式伴挚。可選項：all.obs（假設不存在缺失數(shù)據(jù)）灾炭、everything（數(shù)據(jù)存在缺失值時茎芋，相關系數(shù)計算結果會顯示missing）、complete.obs（行刪除）蜈出、pairwise.complete.obs（成對刪除）田弥。
method：指定相關系數(shù)的類型≌≡可選類型為pearson偷厦、spearman商叹、kendall。

（2）R Script

> testdata1[1:5,1:5] #隨意找的數(shù)據(jù)
              CF1      CF2      CF3      CF4       CM1
rna13468 66.97984 80.07318 54.87525 91.65463  1.401584
rna885   26.53467 44.51450 33.65076 40.72113 60.633389
rna32332  0.00000 37.71825 11.89813  0.00000  1.403419
rna8744  42.44415 52.31791 60.35968 54.00533 39.524090
rna16488  0.00000  0.00000  0.00000  0.00000  0.000000
> cor_data <- cor(testdata1,method="pearson")
> round(cor_data[1:5,1:5],3)
       CF1    CF2    CF3    CF4    CM1
CF1  1.000  0.668  0.697  0.952 -0.129
CF2  0.668  1.000  0.923  0.664 -0.534
CF3  0.697  0.923  1.000  0.647 -0.386
CF4  0.952  0.664  0.647  1.000 -0.203
CM1 -0.129 -0.534 -0.386 -0.203  1.000
> cor_data <- cor(testdata1,method="spearman")
> round(cor_data[1:5,1:5],3)
       CF1    CF2    CF3    CF4    CM1
CF1  1.000  0.728  0.706  0.904 -0.055
CF2  0.728  1.000  0.785  0.734 -0.373
CF3  0.706  0.785  1.000  0.605 -0.212
CF4  0.904  0.734  0.605  1.000 -0.182
CM1 -0.055 -0.373 -0.212 -0.182  1.000

corrplot

（1）用法

corrplot(corr,  #相關性系數(shù)矩陣 
    method = c("circle", "square", "ellipse", "number", "shade", "color", "pie"), 
#可視化的方法只泼，可以是圓形剖笙、方形、橢圓形、數(shù)值肛跌、陰影页慷、顏色或餅圖形 
    type = c("full", "lower", "upper"), 
#指定展示的方式，可以是完全的酪夷、下三角或上三角 
    add = FALSE, 
    col = NULL,  #指定圖形展示的顏色，默認以均勻的顏色展示 
    bg = "white",  #背景色 
    title = "",  #標題 
    is.corr = TRUE,  #是否為相關系數(shù)繪圖 
    diag = TRUE,  #是否展示對角線上的結果 
    outline = FALSE,  #是否繪制圓形孽惰、方形或橢圓形的輪廓 
    mar = c(0,0,0,0),  #設置圖形的四邊間距 
    addgrid.col = NULL, 
#當選擇的方法為顏色或陰影時晚岭，默認的網(wǎng)格線顏色為白色，否則為灰色 
    addCoef.col = NULL, 
#為相關系數(shù)添加顏色勋功，默認不添加相關系數(shù)坦报，只有方法為number時，該參數(shù)才起作用 
    addCoefasPercent = FALSE,  #是否將相關系數(shù)轉換為百分比格式 
    order = c("original", "AOE", "FPC", "hclust", "alphabet"), 
#指定相關系數(shù)排序的方法狂鞋，可以是原始順序original片择、特征向量角序AOE、第一主成分順序FPC骚揍、
#層次聚類順序hclust和字母順序字管，一般AOE排序結果都比FPC要好 
    hclust.method = c("complete", "ward", "single", "average", 
                      "mcquitty", "median", "centroid"), 
#當order為hclust時，該參數(shù)可以是層次聚類中的7種之一 
    addrect = NULL,  #當order為hclust時信不，可以為添加相關系數(shù)圖添加矩形框 
    rect.col = "black",  #指定矩形框的顏色 
    rect.lwd = 2,  #指定矩形框的線寬
    tl.pos = NULL, 
#指定文本標簽(變量名稱)的位置嘲叔，當type=full時，默認標簽位置在左邊和頂部(lt)抽活，
#當type=lower時硫戈，默認標簽在左邊和對角線(ld)，當type=upper時下硕，默認標簽在頂部和對角線丁逝，
#d表示對角線，n表示不添加文本標簽 
    tl.cex = 1,  #指定文本標簽的大小 
    tl.col = "red",  #指定文本標簽的顏色 
    tl.offset = 0.4, tl.srt = 90, 
    cl.pos = NULL, 
#圖例（顏色）位置梭姓，當type=upper或full時果港，圖例在右側，當type=lower時糊昙，圖例在底部辛掠，
#不需要圖例時，只需指定該參數(shù)為n 
    cl.lim = NULL, 
    cl.length = NULL, cl.cex = 0.8, cl.ratio = 0.15, 
    cl.align.text = "c",cl.offset = 0.5, 
    addshade = c("negative", "positive", "all"), 
#只有當method=shade時，該參數(shù)才有用萝衩，參數(shù)值可以是negative/positive和all回挽，分別表示對負相關系數(shù)、
#正相關系數(shù)和所有相關系數(shù)添加陰影猩谊。注意：正相關系數(shù)的陰影是45度千劈，負相關系數(shù)的陰影是135度
    shade.lwd = 1,  #指定陰影的線寬 
    shade.col = "white",  #指定陰影線的顏色 
    p.mat = NULL, sig.level = 0.05, 
    insig = c("pch","p-value","blank", "n"), 
    pch = 4, pch.col = "black", pch.cex = 3, 
    plotCI = c("n","square", "circle", "rect"), 
    lowCI.mat = NULL, uppCI.mat = NULL, ...)

（2）R Script

library(corrplot)
##默認參數(shù)
corrplot(cor_data)

image

##可視化方法
#"circle", "square", "ellipse", "number", "shade", "color", "pie"
corrplot(cor_data, method="pie",title="method=pie")

image

##展示的方式
#"full", "lower", "upper"
corrplot(cor_data, type="upper",title="type=upper")

image

##混合圖形樣式
#corrplot.mixed（matrix,lower="number",upper="circle")
#tl.col修改對角線的顏色,lower.col修改下三角的顏色,number.cex修改下三角字體大小
corrplot.mixed(cor_data,lower="ellipse",upper="pie")
corrplot.mixed(cor_data,lower="number",upper="pie", 
               tl.col="green",lower.col="skyblue",number.cex=1)

image

##order
#"original", "AOE", "FPC", "hclust", "alphabet"
#如果是hclust:
#addrect=4 是分組矩形
#rect.col = "black" 矩形框的顏色 
#rect.lwd = 2 矩形框的線寬
#hclust.method = c("complete", "ward", "single", "average", 
#"mcquitty", "median", "centroid")
corrplot(cor_data,order="hclust",hclust.method="average",addrect=4)
corrplot(cor_data,order="AOE")

image

##顏色
col1 <- colorRampPalette(c("blue","white","red"))
corrplot(cor_data,order="hclust",addrect=4,
         col=col1(100),
         bg="khaki1",addgrid.col="green",
         tl.col="purple",tl.cex=0.7)

image

##添加數(shù)字
corrplot(cor_data,method="color",order="hclust",addrect=4,
         col=col1(100),
         tl.col="black",addCoef.col="grey",addCoefasPercent=T)

image

ggcorrplot

ggcorrplot包內只有2個函數(shù)，一個cor_pmat()用于計算p值牌捷，一個ggcorrplot()用于繪圖墙牌。ggcorrplot相當于精簡版的corrplot包，只有主題更加豐富多樣暗甥。

（1）用法

ggcorrplot(corr, method = c("square", "circle"), type = c("full", "lower", "upper"), 
  ggtheme = ggplot2::theme_minimal, title = "",
  show.legend = TRUE, legend.title = "Corr", 
  show.diag = FALSE, 
  colors = c("blue", "white", "red"), outline.color = "gray",
  hc.order = FALSE, hc.method = "complete", 
  lab = FALSE, lab_col = "black", lab_size = 4, p.mat = NULL, sig.level = 0.05,
  insig = c("pch", "blank"), pch = 4, pch.col = "black", pch.cex = 5,
  tl.cex = 12, tl.col = "black", tl.srt = 45, digits = 2 )

（2）R Script

library(ggcorrplot)
##計算p值
cor_p <- cor_pmat(cor_data)
round(cor_p[1:5,1:5],3)
##默認繪圖square
ggcorrplot(cor_data)
##可視化方法
ggcorrplot(cor_data,method="circle")
##使用聚類順序
ggcorrplot(cor_data,hc.order=TRUE,outline.color="white")

image

##展示的方式
ggcorrplot(cor_data,hc.order=TRUE,outline.color="white",
           type="lower")
##更改顏色
ggcorrplot(cor_data,hc.order=TRUE,outline.color="white",
           type="lower",colors = c("#6D9EC1", "white", "#E46726"))
##更改主題
ggcorrplot(cor_data,hc.order=TRUE,outline.color="white",
           type="lower",colors = c("#6D9EC1", "white", "#E46726"),
           ggtheme = ggplot2::theme_void())

image

#添加相關系數(shù)
ggcorrplot(cor_data,hc.order=TRUE,outline.color="white",
           type="lower",colors = c("#6D9EC1", "white", "#E46726"),
           lab = TRUE)
#不顯著的畫x
ggcorrplot(cor_data,hc.order=TRUE,outline.color="white",
           type="lower",colors = c("#6D9EC1", "white", "#E46726"),
           p.mat = cor_p)

image

ggcorr

（1）R Script

##計算相關系數(shù)
ggcorr(testdata1,method=c("pairwise","spearman"))
##指定顏色標度中包含的斷點數(shù)
ggcorr(testdata1,method=c("pairwise","spearman"),
       nbreaks = 5)
##設置圖例
ggcorr(testdata1,method=c("pairwise","spearman"),
       name="12345", legend.position="bottom", legend.size=12) +
  guides(fill=guide_colorbar(barwidth=18, title.vjust=0.75)) +
  theme(legend.title=element_text(size=14))

image

##設置顏色
ggcorr(testdata1,method=c("pairwise","spearman"),
       low="steelblue", mid="white", high="darkred")
##畫圓形
ggcorr(testdata1,method=c("pairwise","spearman"),
       geom = "circle",min_size=2,max_size=6)
##添加相關系數(shù)
ggcorr(testdata1,method=c("pairwise","spearman"),
       label=TRUE,label_size=3,label_color="white")

image

##控制變量標簽
ggcorr(testdata1,method=c("pairwise","spearman"),
       hjust=0.75, size=5, color="grey50",layout.exp=1)
##顯示較高的相關系數(shù)
ggcorr(testdata1,method=c("pairwise","spearman"),
       label=TRUE, hjust=0.75,geom="blank") +
  geom_point(size=10, aes(color=coefficient>0, 
                          alpha=abs(coefficient)>0.5)) +
  scale_alpha_manual(values=c("TRUE"=0.25,"FALSE"=0)) +
  guides(color=FALSE,alpha=FALSE)

image

樣品間相似性（similarity）和距離（distance）

（1）表示距離的方法

歐式距離（Euclidean Distance）
√(a^2+b2+c^2)
dist(t(x),p=2)
曼哈頓距離（Manhattan Distance）
|a1-a2|+|b1-b2|+|c1-c2|
dist(t(x),"manhattan")
切比雪夫距離（Chebyshev Distance）
max(|a1-a2|,|b1-b2|,|c1-c2|)
dist(t(x),"maximum")
閔可夫斯基距離（Minkowski Distance）
dist(t(x),"minkowski")
標準化歐氏距離（Standardized Euclidean distance）
先將數(shù)據(jù)各維分量標準化到均值方差相等喜滨，即(x-μ)/δ，標準化后的值=(標準化前的值-分量的均值)/分量的標準差撤防。
x1 = scale(t(x), center=T,scale=T)
dist(x1)
馬氏距離（Mahalanobis Distance）
蘭式距離
dist(t(x), method = "canberra")
夾角余弦（Cosine）
漢明距離（Hamming distance）
兩個等長字符串s1與s2之間的漢明距離定義為將其中一個變?yōu)榱硗庖粋€所需要作的最小替換次數(shù)虽风。
x <- c(1, 0, 0)
y <- c(1, 0, 1)
hamming.distance(x, y) #1
杰卡德相似系數(shù)（Jaccard similarity coefficient）
dist(t(x), method = "Jaccard")
相關系數(shù)（Correlation coefficient）與相關距離（Correlation distance）
1-cor(x)
信息熵（Information Entropy）
信息熵是衡量分布的混亂程度或分散程度的一種度量。分布越分散（分布越平均）寄月，信息熵就越大辜膝。分布越有序（分布越集中），信息熵就越小漾肮。
kl散度

（2）dist用法

This function computes and returns the distance matrix computed by using the specified distance measure to compute the distances between the rows of a data matrix.
這個函數(shù)用特定的方法計算矩陣的行之間的距離厂抖，并返回距離矩陣。

dist(x, method = "euclidean", diag = FALSE, upper = FALSE, p = 2)

method：可以是"euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"
diag：是否顯示對角線的值
upper：是否顯示上三角的值
p：The power of the Minkowski distance

（3）scale用法

scale(x, center = TRUE, scale = TRUE)

scale是對矩陣的每一列進行標準化克懊，如果要對行標準化需要先轉置验游。如heatmapdata <- t(scale(t(heatmapdata)))

（4）R Script

sampleDist <- dist(t(testdata1)) 
sampleDistMatrix <- as.matrix(sampleDist) 
colnames(sampleDistMatrix) <- NULL 
colors <- colorRampPalette(rev(brewer.pal(9,"Blues")))(255) 
pheatmap(sampleDistMatrix, 
         clustering_distance_rows=sampleDist, 
         clustering_distance_cols=sampleDist, 
         color = colors)

image

作者：bio_meow
鏈接：http://www.reibang.com/p/aeb9f612e888
來源：簡書
著作權歸作者所有。商業(yè)轉載請聯(lián)系作者獲得授權保檐，非商業(yè)轉載請注明出處。

?著作權歸作者所有,轉載或內容合作請聯(lián)系作者

人面猴
序言：七十年代末崔梗，一起剝皮案震驚了整個濱河市夜只，隨后出現(xiàn)的幾起案子，更是在濱河造成了極大的恐慌蒜魄，老刑警劉巖扔亥，帶你破解...
沈念sama閱讀 222,590評論 6贊 517
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件，死亡現(xiàn)場離奇詭異谈为，居然都是意外死亡旅挤，警方通過查閱死者的電腦和手機，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 95,157評論 3贊 399
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門伞鲫，熙熙樓的掌柜王于貴愁眉苦臉地迎上來粘茄，“玉大人，你說我怎么就攤上這事∑獍辏” “怎么了儒搭？”我有些...
開封第一講書人閱讀 169,301評論 0贊 362
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵，是天一觀的道長芙贫。經常有香客問我搂鲫，道長，這世上最難降的妖魔是什么磺平？我笑而不...
開封第一講書人閱讀 60,078評論 1贊 300
?港島之戀（遺憾婚禮）
正文為了忘掉前任魂仍，我火速辦了婚禮，結果婚禮上拣挪，老公的妹妹穿的比我還像新娘擦酌。我一直安慰自己，他們只是感情好媒吗，可當我...
茶點故事閱讀 69,082評論 6贊 398
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布仑氛。她就那樣靜靜地躺著，像睡著了一般闸英。火紅的嫁衣襯著肌膚如雪锯岖。梳的紋絲不亂的頭發(fā)上，一...
開封第一講書人閱讀 52,682評論 1贊 312
城市分裂傳說
那天甫何，我揣著相機與錄音出吹，去河邊找鬼。笑死辙喂，一個胖子當著我的面吹牛捶牢，可吹牛的內容都是我干的。我是一名探鬼主播巍耗，決...
沈念sama閱讀 41,155評論 3贊 422
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼秋麸，長吁一口氣：“原來是場噩夢啊……” “哼！你這毒婦竟也來了炬太？” 一聲冷哼從身側響起灸蟆，我...
開封第一講書人閱讀 40,098評論 0贊 277
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤，失蹤者是張志新（化名）和其女友劉穎亲族，沒想到半個月后炒考，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體，經...
沈念sama閱讀 46,638評論 1贊 319
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡霎迫，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內容為張勛視角年9月15日...
茶點故事閱讀 38,701評論 3贊 342
?白月光啟示錄
正文我和宋清朗相戀三年斋枢，在試婚紗的時候發(fā)現(xiàn)自己被綠了。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片知给。...
茶點故事閱讀 40,852評論 1贊 353
活死人
序言：一個原本活蹦亂跳的男人離奇死亡瓤帚，死狀恐怖，靈堂內的尸體忽然破棺而出，到底是詐尸還是另有隱情缘滥，我是刑警寧澤轰胁，帶...
沈念sama閱讀 36,520評論 5贊 351
?日本核電站爆炸內幕
正文年R本政府宣布，位于F島的核電站朝扼，受9級特大地震影響赃阀，放射性物質發(fā)生泄漏。R本人自食惡果不足惜擎颖，卻給世界環(huán)境...
茶點故事閱讀 42,181評論 3贊 335
男人毒藥：我在死后第九天來索命
文/蒙蒙一榛斯、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧搂捧，春花似錦驮俗、人聲如沸。這莊子的主人今日做“春日...
開封第一講書人閱讀 32,674評論 0贊 25
一樁弒父案王凑，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽。三九已至聋丝，卻和暖如春索烹，著一層夾襖步出監(jiān)牢的瞬間，已是汗流浹背弱睦。一陣腳步聲響...
開封第一講書人閱讀 33,788評論 1贊 274
情欲美人皮
我被黑心中介騙來泰國打工百姓，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留，地道東北人况木。一個月前我還...
沈念sama閱讀 49,279評論 3贊 379
代替公主和親
正文我出身青樓垒拢，卻偏偏與公主長得像，于是被迫代替她去往敵國和親火惊。傳聞我的和親對象是個殘疾皇子求类，可洞房花燭夜當晚...
茶點故事閱讀 45,851評論 2贊 361

使用R語言進行相關性分析熱圖的繪制

使用R語言進行相關性分析熱圖的繪制

相關性分析

相關性指標

（1）Pearson相關系數(shù)（皮爾遜積差相關系數(shù)）

（2）Spearman等級相關系數(shù)（斯皮爾曼秩相關系數(shù)）

（3）Kendall's Tau相關系數(shù)

（4）其它

cor函數(shù)

（1）用法

（2）R Script

corrplot

（1）用法

（2）R Script

ggcorrplot

（1）用法

（2）R Script

ggcorr

（1）R Script

樣品間相似性（similarity）和距離（distance）

（1）表示距離的方法

（2）dist用法

（3）scale用法

（4）R Script

推薦閱讀更多精彩內容