數(shù)據(jù)可視化_seaborn分布數(shù)據(jù)可視化

seaborn整體風格設置

sns.set() → 整體設置seaborn的主題，調(diào)色板窟感，顏色代碼等多個樣式

# 設置cell多行輸出

from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = 'all' #默認為'last'

# 導入相關庫
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns   # 導入seaborn庫
import os
import warnings
%matplotlib inline

warnings.filterwarnings('ignore')

# 數(shù)據(jù)
years = [1950,1960,1970,1980,1990,2000,2010]
gdp = np.random.rand(7)*1000

data = pd.DataFrame(gdp,index=years)

# 整體設置seaborn的主題風格哈误，調(diào)色板蜜自，顏色
sns.set(style='whitegrid',palette='muted',color_codes=True)
# style 主題風格 包括："white", "dark", "whitegrid", "darkgrid", "ticks"
# palette 調(diào)色板
# color_codes 顏色代碼
plt.plot(np.arange(10))
plt.xlim([0,10])
plt.ylim([0,10])

sns.set_style() → 切換seaborn圖表風格

→ 單獨改變seaborn的主題樣式箭阶，seaborn有5種預先設計號的主題樣式：

darkgrid(默認使用該主題樣式)
dark
whitegrid
white
ticks

sns.set_style('dark')   # 單獨設置主題風格
plt.scatter(years,gdp,)  # 仍然可以使用matplotlib的參數(shù)

sns.despine() → 設置坐標軸

→ white,ticks主題有4個坐標軸仇参，可以用sns.despine()將右側(cè)和頂部的坐標軸去除

seaborn.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)

offset冈敛，設置與坐標軸之間的偏移
trim，為True時寞缝，將坐標軸限制在數(shù)據(jù)最大最小值

sns.set_style('ticks')
plt.plot(np.random.rand(10))
sns.despine()  # 默認去掉頂部和右側(cè)坐標軸

plt.plot(np.random.rand(20),color='r')
sns.despine(offset=10,trim=True)
# offset：與坐標軸之間的偏移
# trim，為True時被啼，將坐標軸限制在數(shù)據(jù)最大最小值

axes_style() → 設置局部圖表風格

單獨設置某個子圖的風格

# 可和with配合的用法
with sns.axes_style("darkgrid"):
    plt.subplot(211)
    plt.plot(np.random.rand(20))
# 設置局部圖表風格浓体，用with做代碼塊區(qū)分

sns.set_style("whitegrid")
plt.subplot(212)
plt.plot(np.random.rand(20))
sns.despine()
# 外部表格風格

set_context() → 設置顯示比例尺度

→ set_context()的選擇包括：'paper', 'notebook', 'talk', 'poster',默認為notebook

sns.set_context('talk')
plt.plot(np.random.rand(20))
plt.xlim([0,19])
plt.ylim([0,1])

color_palette() → 設置調(diào)色盤

→對圖表整體顏色、比例等進行風格設置生闲，包括顏色色板等碍讯，調(diào)用系統(tǒng)風格進行數(shù)據(jù)可視化

color_palette()默認6種顏色：deep, muted, pastel, bright, dark, colorblind

seaborn.color_palette(palette=None, n_colors=None, desat=None)

其他顏色風格
風格內(nèi)容：Accent, Accent_r, Blues, Blues_r, BrBG, BrBG_r, BuGn, BuGn_r, BuPu,
BuPu_r, CMRmap, CMRmap_r, Dark2, Dark2_r, GnBu, GnBu_r, Greens, Greens_r, Greys, Greys_r, OrRd, OrRd_r, Oranges, Oranges_r, PRGn, PRGn_r,
Paired, Paired_r, Pastel1, Pastel1_r, Pastel2, Pastel2_r, PiYG, PiYG_r, PuBu, PuBuGn, PuBuGn_r, PuBu_r, PuOr, PuOr_r, PuRd, PuRd_r, Purples,
Purples_r, RdBu, RdBu_r, RdGy, RdGy_r, RdPu, RdPu_r, RdYlBu, RdYlBu_r, RdYlGn, RdYlGn_r, Reds, Reds_r, Set1, Set1_r, Set2, Set2_r, Set3

# 設置調(diào)色板后冲茸，繪圖創(chuàng)建圖表
sns.color_palette('icefire_r')  # 設置調(diào)色板
sns.set_context('notebook')

def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)
sinplot()
plt.xlim([0,14])
plt.ylim([-6,6])

# 繪制系列顏色

分布數(shù)據(jù)可視化

直方圖 → sns.distplot()

→ sns.distplot('a', 'bins=None', 'hist=True', 'kde=True', 'rug=False', 'fit=None', 'hist_kws=None', 'kde_kws=None', 'rug_kws=None', 'fit_kws=None', 'color=None', 'vertical=False', 'norm_hist=False', 'axlabel=None', 'label=None', 'ax=None')

bins 箱數(shù)
hist难衰、ked 是否顯示箱/密度曲線 (默認情況下hist=True,ked=True,直方圖和密度圖同時顯示)
norm_hist 直方圖是否按照密度來顯示
rug 是否顯示數(shù)據(jù)分布情況
vertical 是否水平顯示
color 設置顏色
label 圖例
axlabel x軸標注
hist_kws 設置箱子的風格盖袭，線寬，透明度凭峡，顏色
kde_kws 設置數(shù)據(jù)密度曲線顏色摧冀，線寬索昂，標注缤至，線形领斥；風格包括：'bar', 'barstacked', 'step', 'stepfilled'
rug_kws 設置數(shù)據(jù)頻率的相關參數(shù)

sns.set_style("darkgrid")
sns.set_context("paper")

rs = np.random.RandomState(10)
s = pd.Series(rs.randn(100)*100)

fig,ax = plt.subplots(2,2,figsize=(14,10))
ax1=ax[0,0]
sns.distplot(s,bins=20,ax=ax1,color='g',kde_kws={'color':'r'},label='distplot') # 默認hist=True,kde=True
ax1.legend()
ax1.set_title('hist=True,kde=True',fontsize=16,pad=12)

ax2=ax[0,1]
sns.distplot(s,bins=20,hist=True,kde=False,axlabel='xxx',ax=ax2)
ax2.set_title('hist=True,kde=False',fontsize=16,pad=12)

ax3=ax[1,0]
sns.distplot(s,bins=20,rug=True,ax=ax3,color='y',rug_kws={'color':'blue'})
ax3.set_title('hist=True,kde=True,rug=True',fontsize=16,pad=12)

ax4=ax[1,1]
sns.distplot(s,rug = True, rug_kws = {'color':'g'},kde_kws={"color": "k", "lw": 1, "label": "KDE",'linestyle':'--'},   
            hist_kws={"histtype": "step", "linewidth": 1,"alpha": 1, "color": "g"})   
            # 風格包括：'bar', 'barstacked', 'step', 'stepfilled'
ax4.set_title('rug_kws,kde_kws,hist_kws',fontsize=16,pad=12)

plt.subplots_adjust(wspace=0.2, hspace=0.4)  #調(diào)整子圖間距

密度圖 → sns.kdeplot()

→ sns.kdeplot('data', 'data2=None', 'shade=False', 'vertical=False', "kernel='gau'", "bw='scott'", 'gridsize=100', 'cut=3', 'clip=None', 'legend=True', 'cumulative=False', 'shade_lowest=True', 'cbar=False', 'cbar_ax=None', 'cbar_kws=None', 'ax=None',)

bw 控制擬合的程度，類似直方圖的箱數(shù)
shade描睦，是否填充
vertical膊存，是否水平
cbar，是否顯示顏色圖例
cmap = 'Reds', 設置調(diào)色盤
shade_lowest=False, 最外圍顏色是否顯示

單個樣本密度分布情況

# 數(shù)據(jù)
rs = np.random.RandomState(10)
s = pd.Series(rs.randn(100)*100)

# 繪圖
plt.figure(figsize=(8,6))
sns.kdeplot(s,shade = False,color = 'r',linewidth=1.2)
sns.kdeplot(s,bw=10)
sns.kdeplot(s,bw=5,color='g')

# 數(shù)據(jù)頻率分布圖
sns.rugplot(s,height=0.1,color='black')

兩個樣本數(shù)據(jù)密度分布圖

→ 兩個維度數(shù)據(jù)生成曲線密度圖忱叭，以顏色作為密度衰減顯示

# 數(shù)據(jù)
rs = np.random.RandomState(2)  # 設定隨機數(shù)種子
df = pd.DataFrame(rs.randn(100,2),
                 columns = ['A','B'])

fig,ax = plt.subplots(2,2,figsize=(14,12))
ax1 = ax[0,0]
sns.kdeplot(df['A'],df['B'],cmap='Reds',cbar=True,shade=True,ax=ax1,shade_lowest=False,n_levels=20)
ax1.set_title('shade_lowest=False',fontsize=16,pad=12)

ax2 = ax[0,1]
sns.kdeplot(df['A'],df['B'],cmap='Blues',cbar=True,shade=True,ax=ax2,shade_lowest=True,n_levels=10)
ax2.set_title('shade_lowest=True',fontsize=16,pad=12)
# shade_lowest=False, 最外圍顏色是否顯示
# n_levels = 10  曲線個數(shù)（如果非常多隔崎，則會越平滑）
# 兩個維度數(shù)據(jù)生成曲線密度圖，以顏色作為密度衰減顯示

# 在x韵丑，y軸上繪制數(shù)據(jù)頻率分布圖
sns.rugplot(df['A'], color="g", axis='x',alpha = 0.5,ax=ax1) 
sns.rugplot(df['B'], color="r", axis='y',alpha = 0.5,ax=ax1)

# 多個密度圖
rs1 = np.random.RandomState(2)  
rs2 = np.random.RandomState(5)  
df1 = pd.DataFrame(rs1.randn(100,2)+2,columns = ['A','B'])
df2 = pd.DataFrame(rs2.randn(100,2)-2,columns = ['A','B'])
ax3=ax[1,0]
sns.kdeplot(df1['A'],df1['B'],ax=ax3,cmap='Greens',shade=True,n_levels=10,shade_lowest=False)
sns.kdeplot(df2['A'],df2['B'],ax=ax3,cmap='RdBu',shade=True,n_levelss=10,shade_lowest=False)

ax4=ax[1,1]
sns.kdeplot(df1['A'],df1['B'],ax=ax4,cmap='Reds_r',shade=True,n_levels=10,shade_lowest=False,cbar=True)
sns.kdeplot(df2['A'],df2['B'],ax=ax4,cmap='Blues',shade=True,n_levelss=10,shade_lowest=False,cbar=True)
fig.tight_layout(pad=1) # 調(diào)整圖表整體空白爵卒，pad=1.08默認

散點圖

綜合散點圖 → sns.jointplot()

→ 一個多面板圖，不僅能顯示兩個變量的關系撵彻，還可顯示每個單變量的分布情況

→ sns.jointplot('x', 'y', 'data=None', "kind='scatter'", 'stat_func=None', 'color=None', 'height=6', 'ratio=5', 'space=0.2', 'dropna=True', 'xlim=None', 'ylim=None', 'joint_kws=None', 'marginal_kws=None', 'annot_kws=None')

x,y 設置xy軸，顯示columns名稱
data 設置數(shù)據(jù)
color 設置顏色
s 設置散點大小(只針對scatter）
kind 設置類型：“scatter”题涨、“reg”、“resid”、“kde”、“hex”
space 設置散點圖和頂部右側(cè)的布局圖的間距
size圖表大懈（自動調(diào)整為正方形）
ratio 散點圖與布局圖高度比奸攻，整型
marginal_kws=dict(bins=15, rug=True) 設置柱狀圖箱數(shù)，是否設置rug

rs = np.random.RandomState(2)  
df = pd.DataFrame(rs.randn(200,2),columns = ['A','B'])

# 散點圖+分布圖(直方圖)
sns.jointplot(x=df['A'],y=df['B'],data=df,space=0.2,size=8,ratio=4,marginal_kws=dict(bins=20,rug=True,color='r'))
# marginal_kws=dict(bins=20,rug=True)設置柱狀圖箱數(shù)新思，是否設置rug

# 散點圖(六邊形圖)+分布圖(直方圖)
df = pd.DataFrame(rs.randn(500,2),columns = ['A','B'])
sns.jointplot(x=df['A'], y=df['B'],data = df, kind="hex", color="blue",marginal_kws=dict(bins=20),size=8)

綜合散點圖 → sns.JointGrid()

→ sns.JointGrid()是可拆分繪制的散點圖假哎，利用plot_joint() + ax_marg_x.hist() + ax_marg_y.hist() 散點圖+直方圖

plot.joint() 設置內(nèi)圖表
ax_marg_x.hist() 設置x軸的布局圖
ax_marg_y.hist() 設置y軸的布局圖

# 導入seaborn的自帶數(shù)據(jù)集
tips = sns.load_dataset('tips')
tips.head()

sns.set_style('white')  # 設置風格
g = sns.JointGrid(x='total_bill',y='tip',data=tips,size=8) # 創(chuàng)建一個繪圖表格區(qū)域笋轨，設置好x、y對應數(shù)據(jù)
g.plot_joint(plt.scatter,color='r')  # 設置內(nèi)圖
g.ax_marg_x.hist(tips['total_bill'],alpha=0.8,bins=20)
g.ax_marg_y.hist(tips['tip'],color='r',alpha=0.8,bins=20,orientation='horizontal')

from scipy import stats
g.annotate(stats.pearsonr)    
# 設置標注饱岸，可以為pearsonr，spearmanr

→ sns.JointGrid()是可拆分繪制的散點圖闲礼，利用plot_joint() + plot_marginals() 散點圖+直方圖+密度圖锨并、

plot.marginals() 直接在x，y軸繪制局部圖

g = sns.JointGrid(x='total_bill', y='tip',data=tips,size=8) # 創(chuàng)建一個繪圖表格區(qū)域新荤，設置好x苟呐、y對應數(shù)據(jù)
g = g.plot_joint(plt.scatter,color='g', s=40, edgecolor='white')   # 繪制散點圖
g.plot_marginals(sns.distplot, kde=True, color='g')  # x澄者，y軸繪制直方圖

g = sns.JointGrid(x='total_bill', y='tip',data=tips,size=8) # 創(chuàng)建一個繪圖表格區(qū)域榕堰，設置好x踱讨、y對應數(shù)據(jù)
g = g.plot_joint(sns.kdeplot,cmap='Reds',n_level=20,shade=True)   # 繪制密度圖
g.plot_marginals(sns.kdeplot, shade=True, color='r')  # x,y軸繪制密度圖

矩陣散點圖 → sns.pairplot()

→ sns.pairplot()只對數(shù)值類型的列有效味混，其創(chuàng)建一個軸矩陣夕土，以此顯示DataFrame中每兩列的關系，在對角上位單變量的分布情況

sns.pairplot('data', 'hue=None', 'hue_order=None', 'palette=None', 'vars=None', 'x_vars=None', 'y_vars=None', "kind='scatter'", "diag_kind='auto'", 'markers=None', 'height=2.5', 'aspect=1', 'dropna=True', 'plot_kws=None', 'diag_kws=None', 'grid_kws=None', 'size=None')

kind 散點圖/回歸分布圖 {‘scatter’,‘reg’}
diag_kind 對角線圖類型設置赢笨，直方圖/密度圖 {‘hist’, ‘kde’}
hue 按照某一字段進行分類
palette 設置調(diào)色板
markers 設置不同系列的點樣式（要根據(jù)參考分類個數(shù)）
size 圖表大小
vars 提取局部變量進行對比時設置

iris = sns.load_dataset("iris")
print(iris.head())

sns.color_palette('Blues_r')  # 設置調(diào)色板
sns.pairplot(iris,kind='scatter',diag_kind='hist',size=3,markers=['o','D','x'],hue='species')

   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa

# 提取局部變量進行對比
sns.pairplot(iris,vars=['sepal_length','sepal_width'],kind='reg',diag_kind='kde',palette='husl',hue='species',size=4)

?著作權歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末除破，一起剝皮案震驚了整個濱河市躁垛，隨后出現(xiàn)的幾起案子，更是在濱河造成了極大的恐慌，老刑警劉巖，帶你破解...
沈念sama閱讀 221,548評論 6贊 515
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件赫蛇，死亡現(xiàn)場離奇詭異，居然都是意外死亡，警方通過查閱死者的電腦和手機病瞳，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 94,497評論 3贊 399
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門，熙熙樓的掌柜王于貴愁眉苦臉地迎上來旷祸，“玉大人赃绊，你說我怎么就攤上這事么夫。” “怎么了摇予？”我有些...
開封第一講書人閱讀 167,990評論 0贊 360
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵蜕猫，是天一觀的道長匾灶。經(jīng)常有香客問我，道長，這世上最難降的妖魔是什么寻狂？我笑而不...
開封第一講書人閱讀 59,618評論 1贊 296
?港島之戀（遺憾婚禮）
正文為了忘掉前任塘慕，我火速辦了婚禮，結(jié)果婚禮上，老公的妹妹穿的比我還像新娘丢烘。我一直安慰自己忧侧，他們只是感情好，可當我...
茶點故事閱讀 68,618評論 6贊 397
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布。她就那樣靜靜地躺著轨域，像睡著了一般冀续。火紅的嫁衣襯著肌膚如雪肝匆。梳的紋絲不亂的頭發(fā)上度硝，一...
開封第一講書人閱讀 52,246評論 1贊 308
城市分裂傳說
那天，我揣著相機與錄音森枪，去河邊找鬼筋遭。笑死撕蔼，一個胖子當著我的面吹牛，可吹牛的內(nèi)容都是我干的毕荐。我是一名探鬼主播陆爽，決...
沈念sama閱讀 40,819評論 3贊 421
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼飒赃，長吁一口氣：“原來是場噩夢啊……” “哼权薯！你這毒婦竟也來了奄抽？” 一聲冷哼從身側(cè)響起馆匿，我...
開封第一講書人閱讀 39,725評論 0贊 276
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤盆昙，失蹤者是張志新（化名）和其女友劉穎炼团，沒想到半個月后，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體贸宏，經(jīng)...
沈念sama閱讀 46,268評論 1贊 320
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡鲫咽，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 38,356評論 3贊 340
?白月光啟示錄
正文我和宋清朗相戀三年伶选，在試婚紗的時候發(fā)現(xiàn)自己被綠了己单。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片件已。...
茶點故事閱讀 40,488評論 1贊 352
活死人
序言：一個原本活蹦亂跳的男人離奇死亡笋额，死狀恐怖，靈堂內(nèi)的尸體忽然破棺而出拨齐，到底是詐尸還是另有隱情鳞陨，我是刑警寧澤，帶...
沈念sama閱讀 36,181評論 5贊 350
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布瞻惋，位于F島的核電站，受9級特大地震影響援岩，放射性物質(zhì)發(fā)生泄漏歼狼。R本人自食惡果不足惜，卻給世界環(huán)境...
茶點故事閱讀 41,862評論 3贊 333
男人毒藥：我在死后第九天來索命
文/蒙蒙一享怀、第九天我趴在偏房一處隱蔽的房頂上張望羽峰。院中可真熱鬧，春花似錦添瓷、人聲如沸梅屉。這莊子的主人今日做“春日...
開封第一講書人閱讀 32,331評論 0贊 24
一樁弒父案鳞贷，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽坯汤。三九已至，卻和暖如春搀愧，著一層夾襖步出監(jiān)牢的瞬間惰聂，已是汗流浹背。一陣腳步聲響...
開封第一講書人閱讀 33,445評論 1贊 272
情欲美人皮
我被黑心中介騙來泰國打工咱筛，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留搓幌，地道東北人。一個月前我還...
沈念sama閱讀 48,897評論 3贊 376
代替公主和親
正文我出身青樓迅箩，卻偏偏與公主長得像溉愁，于是被迫代替她去往敵國和親。傳聞我的和親對象是個殘疾皇子饲趋，可洞房花燭夜當晚...
茶點故事閱讀 45,500評論 2贊 359