使用seaborn包繪制熱圖
# library
import seaborn as sns
import pandas as pd
import numpy as np
# Create a dataset
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
df.head()
a | b | c | d | e | |
---|---|---|---|---|---|
0 | 0.285442 | 0.951543 | 0.685812 | 0.924632 | 0.309812 |
1 | 0.358051 | 0.686573 | 0.286615 | 0.571409 | 0.224154 |
2 | 0.404226 | 0.489562 | 0.848711 | 0.490436 | 0.777601 |
3 | 0.244537 | 0.015112 | 0.253332 | 0.405353 | 0.482515 |
4 | 0.648074 | 0.593299 | 0.788003 | 0.731065 | 0.197049 |
# Default heatmap: just a visualization of this square matrix
sns.heatmap(df)
image.png
# Create a dataset
df = pd.DataFrame(np.random.random((100,5)), columns=["a","b","c","d","e"])
# Calculate correlation between each pair of variable
# 計算相關(guān)性矩陣
corr_matrix=df.corr()
corr_matrix.head()
a | b | c | d | e | |
---|---|---|---|---|---|
a | 1.000000 | 0.159442 | 0.124977 | 0.006820 | -0.164380 |
b | 0.159442 | 1.000000 | 0.204697 | 0.128948 | -0.152218 |
c | 0.124977 | 0.204697 | 1.000000 | 0.013078 | -0.210332 |
d | 0.006820 | 0.128948 | 0.013078 | 1.000000 | -0.066149 |
e | -0.164380 | -0.152218 | -0.210332 | -0.066149 | 1.000000 |
# plot it
# 設(shè)置cmap參數(shù)更改熱圖顏色
sns.heatmap(corr_matrix, cmap='PuOr')
image.png
# Create a dataset
df = pd.DataFrame(np.random.random((10,10)), columns=["a","b","c","d","e","f","g","h","i","j"])
# plot a heatmap with annotation
# 設(shè)置annot=True參數(shù)添加文本注釋
sns.heatmap(df, annot=True, annot_kws={"size": 7})
image.png
# plot a heatmap with custom grid lines
# 設(shè)置linewidths和linecolor參數(shù)更改熱圖邊框線的寬度和顏色
sns.heatmap(df, linewidths=2, linecolor='yellow')
image.png
# plot a heatmap
# 設(shè)置yticklabels=False參數(shù)去掉y軸標簽
sns.heatmap(df, yticklabels=False)
image.png
# plot a heatmap
# 設(shè)置cbar=False參數(shù)去掉圖例
sns.heatmap(df, cbar=False)
image.png
# color bar range between 0 and 0.5
# 設(shè)置vmin和vmax參數(shù)更改圖例范圍
sns.heatmap(df, cmap="YlGnBu", vmin=0, vmax=0.5)
image.png
# Normalize it by row:
# 對數(shù)據(jù)按行進行歸一化
df_norm_row = df.apply(lambda x: (x-x.mean())/x.std(), axis = 1)
df_norm_row.head()
a | b | c | d | e | f | g | h | i | j | |
---|---|---|---|---|---|---|---|---|---|---|
0 | -0.269670 | 0.382143 | -1.460830 | 1.402933 | -0.833766 | -0.245428 | -1.278216 | 1.171598 | 0.937957 | 0.193277 |
1 | 0.474720 | 0.890045 | -0.607959 | 0.143930 | -1.703700 | -0.907119 | 0.459649 | 1.476858 | 0.737861 | -0.964285 |
2 | -0.848842 | 1.051811 | -0.548000 | 0.835517 | 1.096437 | -0.535326 | -0.951875 | -0.831628 | 1.553493 | -0.821587 |
3 | 0.095071 | -1.127515 | -0.090492 | 0.081681 | -0.071626 | -1.829757 | -0.412118 | 1.650594 | 0.903475 | 0.800687 |
4 | 1.600482 | -0.628712 | -0.322168 | -0.625308 | 0.041427 | 1.357510 | -0.904758 | -1.389798 | 0.971431 | -0.100107 |
# And see the result
sns.heatmap(df_norm_row, cmap='viridis')
image.png
# Now if we normalize it by column:
# 對數(shù)據(jù)按列進行歸一化
df_norm_col=(df-df.mean())/df.std()
sns.heatmap(df_norm_col, cmap='viridis')
image.png
對熱圖添加聚類樹
# Libraries
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
# Data set
url = 'c:/Users/Dell/Downloads/mtcars.csv'
df = pd.read_csv(url,index_col=0)
df.head()
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
# Default plot
sns.clustermap(df)
# Show the graph
plt.show()
image.png
# 進行數(shù)據(jù)歸一化
# Standardize or Normalize every column in the figure
# Standardize:
sns.clustermap(df, standard_scale=1)
plt.show()
image.png
# Normalize
sns.clustermap(df, z_score=1)
plt.show()
image.png
# 設(shè)置不同的距離計算方法
# plot with correlation distance
sns.clustermap(df, metric="correlation", standard_scale=1, cmap="PiYG")
plt.show()
image.png
# plot with euclidean distance
sns.clustermap(df, metric="euclidean", standard_scale=1, cmap="PiYG")
plt.show()
image.png
# 設(shè)置不同的聚類方法
# linkage method to use for calculating clusters: single
sns.clustermap(df, metric="euclidean", standard_scale=1, method="single", cmap = "Blues")
plt.show()
image.png
In [38]:
# linkage method to use for calculating clusters: ward
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward", cmap = "Blues")
plt.show()
image.png
# 更改不同的熱圖顏色
# Change color palette
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward", cmap="mako")
plt.show()
image.png
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward", cmap="viridis")
plt.show()
image.png
# 添加行注釋信息
# Prepare a vector of color mapped to the 'cyl' column
my_palette = dict(zip(df.cyl.unique(), ["orange","yellow","brown"]))
row_colors = df.cyl.map(my_palette)
row_colors
Mazda RX4 orange
Mazda RX4 Wag orange
Datsun 710 yellow
Hornet 4 Drive orange
Hornet Sportabout brown
Valiant orange
Duster 360 brown
Merc 240D yellow
Merc 230 yellow
Merc 280 orange
Merc 280C orange
Merc 450SE brown
Merc 450SL brown
Merc 450SLC brown
Cadillac Fleetwood brown
Lincoln Continental brown
Chrysler Imperial brown
Fiat 128 yellow
Honda Civic yellow
Toyota Corolla yellow
Toyota Corona yellow
Dodge Challenger brown
AMC Javelin brown
Camaro Z28 brown
Pontiac Firebird brown
Fiat X1-9 yellow
Porsche 914-2 yellow
Lotus Europa yellow
Ford Pantera L brown
Ferrari Dino orange
Maserati Bora brown
Volvo 142E yellow
Name: cyl, dtype: object</pre>
# plot
sns.clustermap(df, metric="correlation", method="single", cmap="Blues", standard_scale=1, row_colors=row_colors)
plt.show()
image.png