[Machine Learning From Scratch]-unsupervised_learning-gaussian_mixture_model

from __future__ import division, print_function
import math
from sklearn import datasets
import numpy as np

from mlfromscratch.utils import normalize, euclidean_distance, calculate_covariance_matrix
from mlfromscratch.utils import Plot


class GaussianMixtureModel():
    """A probabilistic clustering method for determining groupings among data samples.
    Parameters:
    -----------
    k: int
        The number of clusters the algorithm will form.
    max_iterations: int
        The number of iterations the algorithm will run for if it does
        not converge before that. 
    tolerance: float
        If the difference of the results from one iteration to the next is
        smaller than this value we will say that the algorithm has converged.
    """
    def __init__(self, k=2, max_iterations=2000, tolerance=1e-8):
        self.k = k
        self.parameters = []
        self.max_iterations = max_iterations
        self.tolerance = tolerance
        self.responsibilities = []
        self.sample_assignments = None
        self.responsibility = None

    def _init_random_gaussians(self, X):
        """ Initialize gaussian randomly """
        n_samples = np.shape(X)[0]
        self.priors = (1 / self.k) * np.ones(self.k)
        for i in range(self.k):
            params = {}
            params["mean"] = X[np.random.choice(range(n_samples))]
            params["cov"] = calculate_covariance_matrix(X)
            self.parameters.append(params)

    def multivariate_gaussian(self, X, params):
        """ Likelihood """
        n_features = np.shape(X)[1]
        mean = params["mean"]
        covar = params["cov"]
        determinant = np.linalg.det(covar)
        likelihoods = np.zeros(np.shape(X)[0])
        for i, sample in enumerate(X):
            d = n_features  # dimension
            coeff = (1.0 / (math.pow((2.0 * math.pi), d / 2)
                            * math.sqrt(determinant)))
            exponent = math.exp(-0.5 * (sample - mean).T.dot(np.linalg.pinv(covar)).dot((sample - mean)))
            likelihoods[i] = coeff * exponent

        return likelihoods

    def _get_likelihoods(self, X):
        """ Calculate the likelihood over all samples """
        n_samples = np.shape(X)[0]
        likelihoods = np.zeros((n_samples, self.k))
        for i in range(self.k):
            likelihoods[
                :, i] = self.multivariate_gaussian(
                X, self.parameters[i])
        return likelihoods

    def _expectation(self, X):
        """ Calculate the responsibility """
        # Calculate probabilities of X belonging to the different clusters
        weighted_likelihoods = self._get_likelihoods(X) * self.priors
        sum_likelihoods = np.expand_dims(
            np.sum(weighted_likelihoods, axis=1), axis=1)
        # Determine responsibility as P(X|y)*P(y)/P(X)
        self.responsibility = weighted_likelihoods / sum_likelihoods
        # Assign samples to cluster that has largest probability
        self.sample_assignments = self.responsibility.argmax(axis=1)
        # Save value for convergence check
        self.responsibilities.append(np.max(self.responsibility, axis=1))

    def _maximization(self, X):
        """ Update the parameters and priors """
        # Iterate through clusters and recalculate mean and covariance
        for i in range(self.k):
            resp = np.expand_dims(self.responsibility[:, i], axis=1)
            mean = (resp * X).sum(axis=0) / resp.sum()
            covariance = (X - mean).T.dot((X - mean) * resp) / resp.sum()
            self.parameters[i]["mean"], self.parameters[
                i]["cov"] = mean, covariance

        # Update weights
        n_samples = np.shape(X)[0]
        self.priors = self.responsibility.sum(axis=0) / n_samples

    def _converged(self, X):
        """ Covergence if || likehood - last_likelihood || < tolerance """
        if len(self.responsibilities) < 2:
            return False
        diff = np.linalg.norm(
            self.responsibilities[-1] - self.responsibilities[-2])
        # print ("Likelihood update: %s (tol: %s)" % (diff, self.tolerance))
        return diff <= self.tolerance

    def predict(self, X):
        """ Run GMM and return the cluster indices """
        # Initialize the gaussians randomly
        self._init_random_gaussians(X)

        # Run EM until convergence or for max iterations
        for _ in range(self.max_iterations):
            self._expectation(X)    # E-step
            self._maximization(X)   # M-step

            # Check convergence
            if self._converged(X):
                break

        # Make new assignments and return them
        self._expectation(X)
        return self.sample_assignments
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
  • 序言:七十年代末,一起剝皮案震驚了整個(gè)濱河市避诽,隨后出現(xiàn)的幾起案子宪拥,更是在濱河造成了極大的恐慌菊碟,老刑警劉巖戈轿,帶你破解...
    沈念sama閱讀 222,865評(píng)論 6 518
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件筛圆,死亡現(xiàn)場(chǎng)離奇詭異杖爽,居然都是意外死亡,警方通過查閱死者的電腦和手機(jī)空免,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 95,296評(píng)論 3 399
  • 文/潘曉璐 我一進(jìn)店門空另,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人蹋砚,你說我怎么就攤上這事扼菠。” “怎么了坝咐?”我有些...
    開封第一講書人閱讀 169,631評(píng)論 0 364
  • 文/不壞的土叔 我叫張陵循榆,是天一觀的道長(zhǎng)。 經(jīng)常有香客問我墨坚,道長(zhǎng)秧饮,這世上最難降的妖魔是什么? 我笑而不...
    開封第一講書人閱讀 60,199評(píng)論 1 300
  • 正文 為了忘掉前任泽篮,我火速辦了婚禮盗尸,結(jié)果婚禮上,老公的妹妹穿的比我還像新娘帽撑。我一直安慰自己泼各,他們只是感情好,可當(dāng)我...
    茶點(diǎn)故事閱讀 69,196評(píng)論 6 398
  • 文/花漫 我一把揭開白布亏拉。 她就那樣靜靜地躺著扣蜻,像睡著了一般。 火紅的嫁衣襯著肌膚如雪及塘。 梳的紋絲不亂的頭發(fā)上莽使,一...
    開封第一講書人閱讀 52,793評(píng)論 1 314
  • 那天,我揣著相機(jī)與錄音笙僚,去河邊找鬼芳肌。 笑死,一個(gè)胖子當(dāng)著我的面吹牛味咳,可吹牛的內(nèi)容都是我干的庇勃。 我是一名探鬼主播,決...
    沈念sama閱讀 41,221評(píng)論 3 423
  • 文/蒼蘭香墨 我猛地睜開眼槽驶,長(zhǎng)吁一口氣:“原來是場(chǎng)噩夢(mèng)啊……” “哼!你這毒婦竟也來了鸳兽?” 一聲冷哼從身側(cè)響起掂铐,我...
    開封第一講書人閱讀 40,174評(píng)論 0 277
  • 序言:老撾萬榮一對(duì)情侶失蹤,失蹤者是張志新(化名)和其女友劉穎,沒想到半個(gè)月后全陨,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體爆班,經(jīng)...
    沈念sama閱讀 46,699評(píng)論 1 320
  • 正文 獨(dú)居荒郊野嶺守林人離奇死亡,尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點(diǎn)故事閱讀 38,770評(píng)論 3 343
  • 正文 我和宋清朗相戀三年辱姨,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了柿菩。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
    茶點(diǎn)故事閱讀 40,918評(píng)論 1 353
  • 序言:一個(gè)原本活蹦亂跳的男人離奇死亡雨涛,死狀恐怖枢舶,靈堂內(nèi)的尸體忽然破棺而出,到底是詐尸還是另有隱情替久,我是刑警寧澤凉泄,帶...
    沈念sama閱讀 36,573評(píng)論 5 351
  • 正文 年R本政府宣布,位于F島的核電站蚯根,受9級(jí)特大地震影響后众,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜颅拦,卻給世界環(huán)境...
    茶點(diǎn)故事閱讀 42,255評(píng)論 3 336
  • 文/蒙蒙 一蒂誉、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧距帅,春花似錦右锨、人聲如沸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 32,749評(píng)論 0 25
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至哮肚,卻和暖如春登夫,著一層夾襖步出監(jiān)牢的瞬間,已是汗流浹背允趟。 一陣腳步聲響...
    開封第一講書人閱讀 33,862評(píng)論 1 274
  • 我被黑心中介騙來泰國(guó)打工恼策, 沒想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留,地道東北人潮剪。 一個(gè)月前我還...
    沈念sama閱讀 49,364評(píng)論 3 379
  • 正文 我出身青樓涣楷,卻偏偏與公主長(zhǎng)得像,于是被迫代替她去往敵國(guó)和親抗碰。 傳聞我的和親對(duì)象是個(gè)殘疾皇子狮斗,可洞房花燭夜當(dāng)晚...
    茶點(diǎn)故事閱讀 45,926評(píng)論 2 361

推薦閱讀更多精彩內(nèi)容