from __future__ import division, print_function
import math
from sklearn import datasets
import numpy as np
from mlfromscratch.utils import normalize, euclidean_distance, calculate_covariance_matrix
from mlfromscratch.utils import Plot
class GaussianMixtureModel():
"""A probabilistic clustering method for determining groupings among data samples.
Parameters:
-----------
k: int
The number of clusters the algorithm will form.
max_iterations: int
The number of iterations the algorithm will run for if it does
not converge before that.
tolerance: float
If the difference of the results from one iteration to the next is
smaller than this value we will say that the algorithm has converged.
"""
def __init__(self, k=2, max_iterations=2000, tolerance=1e-8):
self.k = k
self.parameters = []
self.max_iterations = max_iterations
self.tolerance = tolerance
self.responsibilities = []
self.sample_assignments = None
self.responsibility = None
def _init_random_gaussians(self, X):
""" Initialize gaussian randomly """
n_samples = np.shape(X)[0]
self.priors = (1 / self.k) * np.ones(self.k)
for i in range(self.k):
params = {}
params["mean"] = X[np.random.choice(range(n_samples))]
params["cov"] = calculate_covariance_matrix(X)
self.parameters.append(params)
def multivariate_gaussian(self, X, params):
""" Likelihood """
n_features = np.shape(X)[1]
mean = params["mean"]
covar = params["cov"]
determinant = np.linalg.det(covar)
likelihoods = np.zeros(np.shape(X)[0])
for i, sample in enumerate(X):
d = n_features # dimension
coeff = (1.0 / (math.pow((2.0 * math.pi), d / 2)
* math.sqrt(determinant)))
exponent = math.exp(-0.5 * (sample - mean).T.dot(np.linalg.pinv(covar)).dot((sample - mean)))
likelihoods[i] = coeff * exponent
return likelihoods
def _get_likelihoods(self, X):
""" Calculate the likelihood over all samples """
n_samples = np.shape(X)[0]
likelihoods = np.zeros((n_samples, self.k))
for i in range(self.k):
likelihoods[
:, i] = self.multivariate_gaussian(
X, self.parameters[i])
return likelihoods
def _expectation(self, X):
""" Calculate the responsibility """
# Calculate probabilities of X belonging to the different clusters
weighted_likelihoods = self._get_likelihoods(X) * self.priors
sum_likelihoods = np.expand_dims(
np.sum(weighted_likelihoods, axis=1), axis=1)
# Determine responsibility as P(X|y)*P(y)/P(X)
self.responsibility = weighted_likelihoods / sum_likelihoods
# Assign samples to cluster that has largest probability
self.sample_assignments = self.responsibility.argmax(axis=1)
# Save value for convergence check
self.responsibilities.append(np.max(self.responsibility, axis=1))
def _maximization(self, X):
""" Update the parameters and priors """
# Iterate through clusters and recalculate mean and covariance
for i in range(self.k):
resp = np.expand_dims(self.responsibility[:, i], axis=1)
mean = (resp * X).sum(axis=0) / resp.sum()
covariance = (X - mean).T.dot((X - mean) * resp) / resp.sum()
self.parameters[i]["mean"], self.parameters[
i]["cov"] = mean, covariance
# Update weights
n_samples = np.shape(X)[0]
self.priors = self.responsibility.sum(axis=0) / n_samples
def _converged(self, X):
""" Covergence if || likehood - last_likelihood || < tolerance """
if len(self.responsibilities) < 2:
return False
diff = np.linalg.norm(
self.responsibilities[-1] - self.responsibilities[-2])
# print ("Likelihood update: %s (tol: %s)" % (diff, self.tolerance))
return diff <= self.tolerance
def predict(self, X):
""" Run GMM and return the cluster indices """
# Initialize the gaussians randomly
self._init_random_gaussians(X)
# Run EM until convergence or for max iterations
for _ in range(self.max_iterations):
self._expectation(X) # E-step
self._maximization(X) # M-step
# Check convergence
if self._converged(X):
break
# Make new assignments and return them
self._expectation(X)
return self.sample_assignments
[Machine Learning From Scratch]-unsupervised_learning-gaussian_mixture_model
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
- 文/潘曉璐 我一進(jìn)店門空另,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人蹋砚,你說我怎么就攤上這事扼菠。” “怎么了坝咐?”我有些...
- 文/不壞的土叔 我叫張陵循榆,是天一觀的道長(zhǎng)。 經(jīng)常有香客問我墨坚,道長(zhǎng)秧饮,這世上最難降的妖魔是什么? 我笑而不...
- 正文 為了忘掉前任泽篮,我火速辦了婚禮盗尸,結(jié)果婚禮上,老公的妹妹穿的比我還像新娘帽撑。我一直安慰自己泼各,他們只是感情好,可當(dāng)我...
- 文/花漫 我一把揭開白布亏拉。 她就那樣靜靜地躺著扣蜻,像睡著了一般。 火紅的嫁衣襯著肌膚如雪及塘。 梳的紋絲不亂的頭發(fā)上莽使,一...
- 文/蒼蘭香墨 我猛地睜開眼槽驶,長(zhǎng)吁一口氣:“原來是場(chǎng)噩夢(mèng)啊……” “哼!你這毒婦竟也來了鸳兽?” 一聲冷哼從身側(cè)響起掂铐,我...
- 序言:老撾萬榮一對(duì)情侶失蹤,失蹤者是張志新(化名)和其女友劉穎,沒想到半個(gè)月后全陨,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體爆班,經(jīng)...
- 正文 獨(dú)居荒郊野嶺守林人離奇死亡,尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
- 正文 我和宋清朗相戀三年辱姨,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了柿菩。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
- 正文 年R本政府宣布,位于F島的核電站蚯根,受9級(jí)特大地震影響后众,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜颅拦,卻給世界環(huán)境...
- 文/蒙蒙 一蒂誉、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧距帅,春花似錦右锨、人聲如沸。這莊子的主人今日做“春日...
- 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至哮肚,卻和暖如春登夫,著一層夾襖步出監(jiān)牢的瞬間,已是汗流浹背允趟。 一陣腳步聲響...
- 正文 我出身青樓涣楷,卻偏偏與公主長(zhǎng)得像,于是被迫代替她去往敵國(guó)和親抗碰。 傳聞我的和親對(duì)象是個(gè)殘疾皇子狮斗,可洞房花燭夜當(dāng)晚...