# 引入相關(guān)工具庫
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("ggplot")
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['font.sans-serif']=['SimHei','Songti SC','STFangsong']
plt.rcParams['axes.unicode_minus'] = False # 用來正常顯示負(fù)號(hào)
import seaborn as sns
- 頻數(shù)直方圖
x_samples = np.random.randn(1000)
plt.hist(x_samples, bins=10,color='blue',alpha=0.6) # bins=10代表10根柱子
plt.xlabel("x")
plt.ylabel("頻數(shù) n")
plt.title("頻數(shù)直方圖")
plt.show()
- 頻率直方圖
x_samples = np.random.randn(1000)
plt.hist(x_samples, bins=10,color='blue',alpha=0.6,density=True) # bins=10代表10根柱子
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("頻率直方圖")
plt.show()
- 從總體/總體的分布中抽取樣本并計(jì)算樣本均值和計(jì)算偏差
## (1)從總體中抽取樣本
X = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) # 假設(shè)總體為X
x_i = np.random.choice(X, 10, replace=False) # 從總體X中抽取10個(gè)樣本
x_mean = np.mean(x_i) # 計(jì)算樣本均值
x_bias = np.sum(x_i-x_mean) # 計(jì)算偏差和
print("樣本均值為:",x_mean)
print("偏差和為:",x_bias)
## (2)從總體分布中抽取樣本庄涡,假設(shè)總體分布為N(0,1)
x_i = np.random.randn(10) # 從總體分布N(0,1)中抽取10個(gè)樣本
x_mean = np.mean(x_i) # 計(jì)算樣本均值
x_bias = np.sum(x_i-x_mean) # 計(jì)算偏差和
print("樣本均值為:",x_mean)
print("偏差和為:",x_bias)
def GetSampleDist(n, X):
x_mean_list = []
for i in range(n):
x_i = np.random.choice(X, 5, replace=False)
x_mean = np.mean(x_i)
x_mean_list.append(x_mean)
plt.hist(x_mean_list,color='blue',alpha=0.6,density=True)
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("n="+str(n))
plt.show()
X = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) # 假設(shè)總體為X, size=20
GetSampleDist(5, X)
GetSampleDist(10, X)
GetSampleDist(20, X)
GetSampleDist(100, X)
GetSampleDist(1000, X)
GetSampleDist(10000, X)
GetSampleDist(100000, X)
- 從總體/總體的分布中抽取樣本并計(jì)算樣本方差與樣本標(biāo)準(zhǔn)差
## (1)從總體中抽取樣本
X = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) # 假設(shè)總體為X
x_i = np.random.choice(X, 10, replace=False) # 從總體X中抽取10個(gè)樣本
x_sn2 = np.var(x_i,ddof=0) #樣本方差
x_s2 = np.var(x_i,ddof=1) # 無偏樣本方差
x_sn = np.std(x_i,ddof=0) # 樣本標(biāo)準(zhǔn)差
x_s = np.std(x_i,ddof=1) # 無偏樣本標(biāo)準(zhǔn)差
print("樣本方差sn^2為:",x_sn2)
print("樣本方差s^2為:",x_s2)
print("樣本標(biāo)準(zhǔn)差sn^2為:",x_sn)
print("樣本標(biāo)準(zhǔn)差s^2為:",x_s)
## (2)從總體分布中抽取樣本仿畸,假設(shè)總體分布為N(0福稳,1)
x_i = np.random.randn(10) # 從總體分布N(0,1)中抽取10個(gè)樣本
x_sn2 = np.var(x_i,ddof=0) #樣本方差
x_s2 = np.var(x_i,ddof=1) # 無偏樣本方差
x_sn = np.std(x_i,ddof=0) # 樣本標(biāo)準(zhǔn)差
x_s = np.std(x_i,ddof=1) # 無偏樣本標(biāo)準(zhǔn)差
print("樣本方差sn^2為:",x_sn2)
print("樣本方差s^2為:",x_s2)
print("樣本標(biāo)準(zhǔn)差sn^2為:",x_sn)
print("樣本標(biāo)準(zhǔn)差s^2為:",x_s)
x_mean_list=[]
for i in range(10000):
x_i=np.random.randn(10)
x_mean_list.append(np.mean(x_i))
print("標(biāo)準(zhǔn)正態(tài)分布的均值和方差為:",0,1)
print("1000個(gè)樣本均值的樣本均值為:",np.mean(x_mean_list))
print("1000個(gè)樣本均值的樣本方為:",np.var(x_mean_list,ddof=1))
- 最小次序統(tǒng)計(jì)量及其分布
from scipy.stats import rv_discrete # 自定義離散分布
x_k = np.arange(3)+1
p_k = np.array([1/3]*3)
X = rv_discrete(name='min', values=(x_k, p_k))
def Get_Min_Dist(n, X):
Min_list = []
for i in range(n):
min_value = np.min(X.rvs(size=3))
Min_list.append(min_value)
# 統(tǒng)計(jì)每個(gè)值出現(xiàn)的次數(shù)
xk_count = []
for v in x_k:
xk_count.append(np.sum(Min_list==v))
# 畫圖
plt.bar(x_k,np.array(xk_count)/n,color='blue',alpha=0.6)
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.xlim(0,4)
plt.title("n="+str(n))
plt.show()
Get_Min_Dist(5, X)
Get_Min_Dist(10, X)
Get_Min_Dist(20, X)
Get_Min_Dist(100, X)
Get_Min_Dist(1000, X)
Get_Min_Dist(10000, X)
- 最大次序統(tǒng)計(jì)量及其分布
from scipy.stats import rv_discrete # 自定義離散分布
x_k = np.arange(3)+1
p_k = np.array([1/3]*3)
X = rv_discrete(name='min', values=(x_k, p_k))
def Get_Max_Dist(n, X):
Max_list = []
for i in range(n):
max_value = np.max(X.rvs(size=3))
Max_list.append(max_value)
# 統(tǒng)計(jì)每個(gè)值出現(xiàn)的次數(shù)
xk_count = []
for v in x_k:
xk_count.append(np.sum(Max_list==v))
# 畫圖
plt.bar(x_k,np.array(xk_count)/n,color='blue',alpha=0.6)
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.xlim(0,4)
plt.title("n="+str(n))
plt.show()
Get_Max_Dist(5, X)
Get_Max_Dist(10, X)
Get_Max_Dist(20, X)
Get_Max_Dist(100, X)
Get_Max_Dist(1000, X)
Get_Max_Dist(10000, X)
- 從總體/總體的分布中抽取樣本并計(jì)算樣本中位數(shù)
## (1)從總體中抽取樣本
X = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) # 假設(shè)總體為X
x_i = np.random.choice(X, 10, replace=False) # 從總體X中抽取10個(gè)樣本
x_mid = np.median(x_i) # 計(jì)算樣本中位數(shù)
print("樣本中位數(shù)為:",x_mid)
## (2)從總體分布中抽取樣本枢希,假設(shè)總體分布為N(0啡省,1)
x_i = np.random.randn(10) # 從總體分布N(0,1)中抽取10個(gè)樣本
x_mid = np.median(x_i) # 計(jì)算樣本中位數(shù)
print("樣本中位數(shù)為:",x_mid)
- 從總體/總體的分布中抽取樣本并計(jì)算樣本分位數(shù)
## (1)從總體中抽取樣本
X = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) # 假設(shè)總體為X
x_i = np.random.choice(X, 10, replace=False) # 從總體X中抽取10個(gè)樣本
x_low = np.percentile(x_i,25) # 計(jì)算樣本下四分位數(shù)
x_high = np.percentile(x_i,75) # 計(jì)算樣本上四分位數(shù)
print("樣本下四分位數(shù)為:",x_low)
print("樣本上四分位數(shù)為:",x_high)
def GetMidDist(n, X):
x_mid_list = []
for i in range(n):
x_i = np.random.choice(X, 10, replace=False)
x_mid = np.median(x_i)
x_mid_list.append(x_mid)
plt.hist(x_mid_list,color='blue',alpha=0.6,density=True)
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("n="+str(n))
plt.show()
X = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]) # 假設(shè)總體為X, size=20
GetMidDist(5, X)
GetMidDist(10, X)
GetMidDist(20, X)
GetMidDist(100, X)
GetMidDist(1000, X)
GetMidDist(10000, X)
GetMidDist(100000, X)
- 用python畫出不同自由度下的卡方分布的密度函數(shù)圖chi2.pdf(x,df=4)丐枉、chi2.pdf(x,df=6)哆键、chi2.pdf(x,df=10)
# 使用scipy計(jì)算pdf畫圖(非自定義函數(shù))
from scipy.stats import chi2
x = np.linspace(0.01,30,10000)
plt.plot(x, chi2.pdf(x,df=4),'r-', lw=5, alpha=0.6, label='chi2(4)',c='red')
plt.plot(x, chi2.pdf(x,df=6),'r-', lw=5, alpha=0.6, label='chi2(6)',c='blue')
plt.plot(x, chi2.pdf(x,df=10),'r-', lw=5, alpha=0.6, label='chi2(10)',c='orange')
plt.xlabel("X")
plt.ylabel("p (x)")
plt.legend()
plt.show()
- 使用卡方分布的定義演示卡方分布
from scipy.stats import norm
n = 10
chi2_list = []
for i in range(100000):
x_i = norm.rvs(loc=0,scale=1,size=10)
chi2_T = np.sum(np.square(x_i))
chi2_list.append(chi2_T)
sns.distplot(chi2_list,color='blue')
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("n="+str(n))
plt.show()
卡方分布的期望值等于自由度,方差等于兩倍自由度
# 假設(shè)總體為N(0矛洞,1)洼哎,抽樣的樣本容量為n=11,抽樣的次數(shù)為N
from scipy.stats import norm
def S2_Chi2(N):
mu,sig = 0,1
n = 11
T_list = []
for i in range(N):
x_i = norm.rvs(loc=mu,scale=sig,size=n) # 正態(tài)分布總體抽樣
T = (n-1)*np.var(x_i,ddof=1)/sig**2 # 構(gòu)造卡方統(tǒng)計(jì)量
T_list.append(T)
sns.distplot(T_list,color='blue')
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("chi2(10)")
plt.show()
S2_Chi2(100000)
F統(tǒng)計(jì)量與F分布
F是自由度為m與n的F分布
- 使用scipy與matplotlib繪制不同的m,n下的F分布的密度函數(shù)
from scipy.stats import f
x = np.linspace(0.01,5,10000)
plt.plot(x, f.pdf(x,4,4000),'r-', lw=5, alpha=0.6, label='F(4,4000)',c='red')
plt.plot(x, f.pdf(x,4,10),'r-', lw=5, alpha=0.6, label='F(4,10)',c='blue')
plt.plot(x, f.pdf(x,4,4),'r-', lw=5, alpha=0.6, label='F(4,4)',c='orange')
plt.plot(x, f.pdf(x,4,1),'r-', lw=5, alpha=0.6, label='F(4,1)',c='yellow')
plt.xlabel("X")
plt.ylabel("p (x)")
plt.legend()
plt.show()
- 使用F統(tǒng)計(jì)量的定義演示
from scipy.stats import norm
m,n = 4,4000
F_list = []
for i in range(100000):
chi2_m_sample = np.sum(np.square(norm.rvs(loc=0,scale=1,size=m))) # 卡方m統(tǒng)計(jì)量
chi2_n_sample = np.sum(np.square(norm.rvs(loc=0,scale=1,size=n))) # 卡方n統(tǒng)計(jì)量
F_T = (chi2_m_sample/m) / (chi2_n_sample/n) # # F(m沼本,n)統(tǒng)計(jì)量
F_list.append(F_T)
sns.distplot(F_list,color='blue')
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("F(4,4000)")
plt.show()
F分布的密度函數(shù)是一個(gè)只取正值的偏態(tài)分布# x1,x2,...,xn ~ N(0,1), y1,y2,...,yn ~ N(0,4)
from scipy.stats import norm
F_list = []
for i in range(100):
x_mu, x_sigma2 = 0, 1
y_mu, y_sigma2 = 0, 4
norm_xi = norm.rvs(loc=x_mu,scale=x_sigma2,size=5)
norm_yi = norm.rvs(loc=y_mu,scale=y_sigma2,size=5)
sx_2 = np.var(norm_xi,ddof=1)
sy_2 = np.var(norm_yi,ddof=1)
F_T = (sx_2/x_sigma2) / (sy_2/y_sigma2)
F_list.append(F_list)
sns.distplot(F_list,color='red')
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("F(4,4)")
plt.show()
t分布及其統(tǒng)計(jì)量
- 使用scipy與matplotlib繪制不同的n下的t分布的密度函數(shù)
from scipy.stats import t
from scipy.stats import norm
x = np.linspace(-6,6,10000)
plt.plot(x, t.pdf(x,4),'--', lw=5, alpha=0.6, label='t (4)',c='red')
plt.plot(x, norm.pdf(x,loc=0,scale=1),'r-', lw=5, alpha=0.6, label='N (0,1)',c='yellow')
plt.plot(x, t.pdf(x,100),'--', lw=5, alpha=0.6, label='t (100)',c='blue')
plt.xlabel("X")
plt.ylabel("p (x)")
plt.legend()
plt.show()
當(dāng)自由度較大的時(shí)候噩峦,t分布近似于標(biāo)準(zhǔn)正態(tài)分布
from scipy.stats import norm
from scipy.stats import t
t_list = []
for i in range(300000):
mu,sigma2 = 0,1
x_i = norm.rvs(loc=mu, scale=sigma2, size=5)
x_mean = np.mean(x_i)
x_s = np.std(x_i,ddof=1)
t_T = np.sqrt(4)*(x_mean-mu) / x_s
t_list.append(t_T)
sns.distplot(t_list,color='blue',label='t (4)')
x = np.linspace(-6,6,10000)
plt.plot(x, norm.pdf(x,loc=0,scale=1),'r-', lw=5, alpha=0.6, label='N (0,1)',c='yellow')
plt.xlabel("x")
plt.ylabel("頻率 p")
plt.title("t (4)")
plt.xlim(-6,6)
plt.legend()
plt.show()
參數(shù)估計(jì)——點(diǎn)估計(jì)
矩估計(jì)
- 原點(diǎn)矩和中心距
- 假設(shè)總體是標(biāo)準(zhǔn)正態(tài)分布辫红,求3階原點(diǎn)矩和中心矩
from scipy.stats import norm
x_i = norm.rvs(loc=0, scale=1, size=10000)
a3 = np.mean(np.power(x_i,3))
b3 = np.mean(np.power((x_i-np.mean(x_i)), 3))
print("3階原點(diǎn)矩:",a3)
print("3階中心矩:",b3)
- 矩估計(jì)
用樣本矩替代總體矩
# 假設(shè)真實(shí)值lambda = 5
from scipy.stats import expon
real_lmd = 5
x_i = np.random.exponential(scale=1/real_lmd, size=1000)
print("矩估計(jì)為:",1/np.mean(x_i))
極大似然估計(jì)
# 使用sympy演示極大似然估計(jì)的案例
from sympy import *
p = Symbol('p') #定義總體參數(shù)
P_p = p**7*(1-p)**3 # 定義似然函數(shù)
lnP_p = ln(P_p) # 化簡(jiǎn)為對(duì)數(shù)似然
d_ln_P = diff(lnP_p, p) # 求導(dǎo)函數(shù)
p_hat = solve(d_ln_P, p) # 導(dǎo)函數(shù)為0
print("p的極大似然估計(jì)為:",p_hat)
- 指數(shù)函數(shù)中l(wèi)amda的極大似然估計(jì)
from sympy.abc import lamda
x_1,x_2,x_3,x_4,x_5 = symbols('x_1:6') # 定義多個(gè)樣本變量
x_1,x_2,x_3,x_4,x_5 = 2, 3, 0.5, 5, 2
f_lmd = lamda*E**(-lamda*x_1) * lamda*E**(-lamda*x_2) * lamda*E**(-lamda*x_3) * lamda*E**(-lamda*x_4) * lamda*E**(-lamda*x_5) # 定義似然函數(shù)
ln_f_lmd = ln(f_lmd) # 定義對(duì)數(shù)似然函數(shù)
d_ln_f = diff(ln_f_lmd, lamda) # 求導(dǎo)
lmd_hat = solve(d_ln_f, lamda) # 導(dǎo)數(shù)為0
print("指數(shù)分布參數(shù)lamda的極大似然估計(jì)值為:",lmd_hat)
print("指數(shù)分布參數(shù)lamda的極大似然公式求解為:", 5 / (x_1+x_2+x_3+x_4+x_5))
無偏性與有效性(方差最衅就俊)
區(qū)間估計(jì)
bootstrap就是在樣本中重抽樣名惩,每抽樣一次計(jì)算一次統(tǒng)計(jì)量T
澎胡,這樣就可以構(gòu)造一個(gè)抽樣分布,取抽樣分布的a/2分位數(shù)點(diǎn)和1-a/2分位數(shù)點(diǎn)作為區(qū)間左右邊界就可
# 使用bootstrap方法計(jì)算N(0,1)的mu的置信區(qū)間:樣本量為1000娩鹉,重抽樣樣本量為500, 重抽樣的次數(shù)為100000次
T_list = []
N, N_re = 1000, 500
total_times = 10000
alpha = 0.05
x_i = np.random.randn(N) # 抽樣1000個(gè)
for i in range(total_times):
x_re = np.random.choice(x_i, N_re, replace=True) # 從樣本中重抽樣
T = np.mean(x_re)
T_list.append(T)
left = np.percentile(np.array(T_list), 100*alpha/2)
right = np.percentile(np.array(T_list), 100*(1-alpha/2))
print("正態(tài)總體的mu的置信區(qū)間為:["+str(left)+", "+str(right)+"]")
- 置信區(qū)間的置信水平是什么意思攻谁??弯予?
# 探索置信度1-alpha的含義:
def get_confident_interval(x_i):
T_list = []
N, N_re = 1000, 500
total_times = 10000
alpha = 0.05
x_i = np.random.randn(N)
for i in range(total_times):
x_re = np.random.choice(x_i, N_re, replace=True) # 從樣本中重抽樣
T = np.mean(x_re)
T_list.append(T)
left = np.percentile(np.array(T_list), 100*alpha/2)
right = np.percentile(np.array(T_list), 100*(1-alpha/2))
return {'left':left, 'right':right}
left_right_list = [] #100個(gè)置信區(qū)間的列表
for i in range(100):
x_i = np.random.randn(1000) #每次抽樣1000個(gè)樣本
T_i = get_confident_interval(x_i)
left_right_list.append(T_i)
for i in range(len(left_right_list)):
plt.vlines(x=i+1, ymin=left_right_list[i]['left'], ymax=left_right_list[i]['right'])
plt.scatter(np.array([i+1]*2),np.array([left_right_list[i]['left'],left_right_list[i]['right']]),color='blue')
plt.axhline(y=0, ls='--', color='b')
plt.show()
假設(shè)檢驗(yàn)之基本思想
- 步驟(建立原假設(shè)與備擇假設(shè)——選擇統(tǒng)計(jì)量并給出拒絕域的形式——選擇顯著性水平a——給出拒絕域W)
# python模擬以上的假設(shè)檢驗(yàn):
from scipy.stats import norm
x_i = np.array([1.0, 0.85, 0.90, 0.98, 0.96, 1.0, 0.97, 0.98, 0.98, 0.98]) # 從當(dāng)天生產(chǎn)的肉類總體中抽樣10個(gè)
x_mean = np.mean(x_i) # 構(gòu)造樣本均值統(tǒng)計(jì)量
c = norm(loc=1, scale=np.sqrt(1/10)).ppf(0.05) # 總體N(theta, 1)戚宦,則樣本均值的抽樣分布為N(theta, 1/n),n為樣本量
print("樣本均值為:",x_mean)
print("拒絕域?yàn)椋海?inf锈嫩, "+str(c)+"]")
print("x_mean位于接受域受楼,接受原假設(shè)!")
假設(shè)檢驗(yàn)之正態(tài)總體參數(shù)的假設(shè)檢驗(yàn)
- 方差已知時(shí)的u檢驗(yàn)
# 使用python模擬u檢驗(yàn):
from scipy.stats import norm
mu_0, sigma= 65, 0.4
x_i = np.array([60, 72, 67, 89, 90, 100, 67, 76, 87, 78]) #樣本
u_T = (np.mean(x_i) - mu) / (sigma / np.sqrt(len(x_i))) # 計(jì)算樣本統(tǒng)計(jì)量
left = norm(loc=0, scale=1).ppf(0.025)
right = norm(loc=0, scale=1).ppf(0.975)
print("拒絕域?yàn)椋篬"+str(left)+","+str(right)+"]")
print("u統(tǒng)計(jì)量為:",u_T)
print("u_T位于拒絕域呼寸,拒絕原假設(shè)艳汽!")
- 方差未知時(shí)的t檢驗(yàn)
# 使用python模擬單樣本t檢驗(yàn)
from scipy.stats import t
mu_0 = 250
x_i = np.array([248.8, 249.2, 250.7, 251.2, 248.0, 253.0, 248.9, 250.2, 251.2, 249.2]) # 樣本
t_T = (np.mean(x_i)-mu_0)/(np.std(x_i, ddof=1)/np.sqrt(len(x_i)-1)) # 樣本統(tǒng)計(jì)量
right = t(len(x_i)-1).ppf(0.05)
print("拒絕域?yàn)椋?-inf, "+str(right)+"]")
print("t統(tǒng)計(jì)量為:",t_T)
print("t_T位于接受域,不能拒絕原假設(shè)等舔!")
# 使用scipy進(jìn)行單樣本t檢驗(yàn)
from scipy.stats import ttest_1samp
x_i = np.array([248.8, 249.2, 250.7, 251.2, 248.0, 253.0, 248.9, 250.2, 251.2, 249.2]) # 樣本
t_T, p_value = ttest_1samp(x_i, popmean=250, alternative="less") # popmean:總體的期望骚灸, alternative={‘two-sided’(雙側(cè)), ‘less’(左側(cè)), ‘greater’(右側(cè))} ; 返回:t統(tǒng)計(jì)量和p值
print("p = ", p_value)
print("由于p>0.05慌植,不能拒絕原假設(shè)甚牲!")
- 兩個(gè)正態(tài)總體的均值差檢驗(yàn)(方差已知)
# 使用python進(jìn)行兩個(gè)正態(tài)總體的均值差檢驗(yàn)
from scipy.stats import norm
sigma2_1 = sigma2_2 = 0.4
x_i = np.array([2.2, 3.1, 2.5, 2.8, 3.0, 5.0, 4.3, 1.8, 0.2, 1.0]) # 修復(fù)前的樣本
y_i = np.array([3.2, 2.8, 2.2, 1.4, 4.4, 3.4, 6.4, 4.0, 3.9, 2.1]) # 修復(fù)后的樣本
m, n = len(x_i), len(y_i) # 樣本數(shù)
u_T = (np.mean(x_i) - np.mean(y_i)) / np.sqrt(sigma2_1/m + sigma2_2/n) #計(jì)算檢驗(yàn)統(tǒng)計(jì)量
left = norm(loc=0, scale=1).ppf(0.025) #拒絕域左邊界
right = norm(loc=0, scale=1).ppf(0.975) # 拒絕域右邊界
print("樣本統(tǒng)計(jì)量為:", u_T)
print("拒絕域?yàn)椋?-inf, "+str(left)+"]與["+str(right)+", +inf)")
print("樣本統(tǒng)計(jì)量位于拒絕域义郑,拒絕原假設(shè)!")
- 方差未知時(shí)的兩樣本t檢驗(yàn)
# 使用python的scipy進(jìn)行方差未知時(shí)丈钙,兩個(gè)正態(tài)總體的均值差檢驗(yàn)
from scipy.stats import ttest_ind
x_i = np.array([2.2, 3.1, 2.5, 2.8, 3.0, 5.0, 4.3, 1.8, 0.2, 1.0]) # 修復(fù)前的樣本
y_i = np.array([3.2, 2.8, 2.2, 1.4, 4.4, 3.4, 6.4, 4.0, 3.9, 2.1]) # 修復(fù)后的樣本
m, n = len(x_i), len(y_i) # 樣本數(shù)
t_T, p_value = ttest_ind(x_i, y_i, equal_var=False, alternative='two-sided') # equal_var=True默認(rèn)同方差非驮, alternative={‘two-sided’(雙側(cè)), ‘less’(左側(cè)), ‘greater’(右側(cè))} ; 返回:t統(tǒng)計(jì)量和p值
print("p值為:",p_value)
print("p>0.05, 則不能拒絕原假設(shè)雏赦!")
- 單個(gè)正態(tài)總體方差的檢驗(yàn)
# 使用python進(jìn)行單個(gè)正態(tài)總體的方差檢驗(yàn)
from scipy.stats import chi2
sigma0_2 = 2.0
x_i = np.array([23.3, 24.7, 23.1, 19.4, 18.9, 20.3, 24.9, 30.2, 19.0, 20.9]) # 樣本
n = len(x_i) # 樣本量
chi_T = (n-1)*np.std(x_i, ddof=1) / sigma0_2 #檢驗(yàn)統(tǒng)計(jì)量
W_right = chi2(df = n-1).ppf(0.95) #拒絕域的邊界
print("檢驗(yàn)統(tǒng)計(jì)量為:",chi_T)
print("拒絕域?yàn)椋篬"+str(W_right)+", +inf)")
print("統(tǒng)計(jì)量位于接受域劫笙,不能拒絕原假設(shè)!")
- 兩個(gè)正態(tài)總體方差比的 F檢驗(yàn)
# 使用python做兩個(gè)正態(tài)總體方差比的 $F$ 檢驗(yàn):
from scipy.stats import f
x_i = np.array([23.3, 24.7, 23.1, 19.4, 18.9, 20.3, 24.9, 30.2, 19.0, 20.9]) # 樣本X
y_i = np.array([34.1, 43.2, 31.1, 12.4, 36.9, 21.3, 39.9, 20.2, 39.0, 12.2]) # 樣本Y
m,n = len(x_i), len(y_i) # 樣本量
F_T = np.std(x_i, ddof=1) / np.std(y_i, ddof=1) #檢驗(yàn)統(tǒng)計(jì)量
W_left = f(dfn=m-1, dfd=n-1).ppf(0.05)
print("統(tǒng)計(jì)量為:", F_T)
print("拒絕域?yàn)椋?0, "+str(W_left)+"]")
print("統(tǒng)計(jì)量位于拒絕域星岗,拒絕原假設(shè)填大!")
假設(shè)檢驗(yàn)之似然比檢驗(yàn)與Bootstrap方法
- 似然比檢驗(yàn)
# 使用Bootstrap做假設(shè)檢驗(yàn)
mu_0 = 250
x_i = np.array([248.8, 249.2, 250.7, 251.2, 248.0, 253.0, 248.9, 250.2, 251.2, 249.2]) # 樣本
x_bar_T = np.mean(x_i) #計(jì)算檢驗(yàn)統(tǒng)計(jì)量
## Bootstrap重抽樣構(gòu)造統(tǒng)計(jì)量的抽樣分布過程
T_list = []
N = 10000 # 重抽樣次數(shù)
for i in range(N):
x_i_resample = np.random.choice(x_i, len(x_i), replace=True)
x_bar_resample = np.mean(x_i_resample)
T_list.append(x_bar_resample)
right = np.percentile(T_list, 5)
print("檢驗(yàn)統(tǒng)計(jì)量為:",x_bar_T)
print("拒絕域?yàn)椋?-inf, "+str(right)+"]")
print("檢驗(yàn)統(tǒng)計(jì)量位于接受域允华,不能拒絕原假設(shè)!")
# t檢驗(yàn)解決以上問題
from scipy.stats import t
mu_0 = 250
x_i = np.array([248.8, 249.2, 250.7, 251.2, 248.0, 253.0, 248.9, 250.2, 251.2, 249.2]) # 樣本
t_T = (np.mean(x_i)-mu_0)/(np.std(x_i, ddof=1)/np.sqrt(len(x_i)-1)) # 樣本統(tǒng)計(jì)量
right = t(len(x_i)-1).ppf(0.05)
print("拒絕域?yàn)椋?-inf, "+str(right)+"]")
print("t統(tǒng)計(jì)量為:",t_T)
print("t_T位于接受域寥掐,不能拒絕原假設(shè)靴寂!")