[Machine Learning From Scratch]-supervised_learning-adaboost

from __future__ import division, print_function
import numpy as np
import math
from sklearn import datasets
import matplotlib.pyplot as plt
import pandas as pd

# Import helper functions
from mlfromscratch.utils import train_test_split, accuracy_score, Plot

# Decision stump used as weak classifier in this impl. of Adaboost
class DecisionStump():
    def __init__(self):
        # Determines if sample shall be classified as -1 or 1 given threshold
        self.polarity = 1
        # The index of the feature used to make classification
        self.feature_index = None
        # The threshold value that the feature should be measured against
        self.threshold = None
        # Value indicative of the classifier's accuracy
        self.alpha = None

class Adaboost():
    """Boosting method that uses a number of weak classifiers in 
    ensemble to make a strong classifier. This implementation uses decision
    stumps, which is a one level Decision Tree. 
    Parameters:
    -----------
    n_clf: int
        The number of weak classifiers that will be used. 
    """
    def __init__(self, n_clf=5):
        self.n_clf = n_clf

    def fit(self, X, y):
        n_samples, n_features = np.shape(X)

        # Initialize weights to 1/N
        w = np.full(n_samples, (1 / n_samples))
        
        self.clfs = []
        # Iterate through classifiers
        for _ in range(self.n_clf):
            clf = DecisionStump()
            # Minimum error given for using a certain feature value threshold
            # for predicting sample label
            min_error = float('inf')
            # Iterate throught every unique feature value and see what value
            # makes the best threshold for predicting y
            for feature_i in range(n_features):
                feature_values = np.expand_dims(X[:, feature_i], axis=1)
                unique_values = np.unique(feature_values)
                # Try every unique feature value as threshold
                for threshold in unique_values:
                    p = 1
                    # Set all predictions to '1' initially
                    prediction = np.ones(np.shape(y))
                    # Label the samples whose values are below threshold as '-1'
                    prediction[X[:, feature_i] < threshold] = -1
                    # Error = sum of weights of misclassified samples
                    error = sum(w[y != prediction])
                    
                    # If the error is over 50% we flip the polarity so that samples that
                    # were classified as 0 are classified as 1, and vice versa
                    # E.g error = 0.8 => (1 - error) = 0.2
                    if error > 0.5:
                        error = 1 - error
                        p = -1

                    # If this threshold resulted in the smallest error we save the
                    # configuration
                    if error < min_error:
                        clf.polarity = p
                        clf.threshold = threshold
                        clf.feature_index = feature_i
                        min_error = error
            # Calculate the alpha which is used to update the sample weights,
            # Alpha is also an approximation of this classifier's proficiency
            clf.alpha = 0.5 * math.log((1.0 - min_error) / (min_error + 1e-10))
            # Set all predictions to '1' initially
            predictions = np.ones(np.shape(y))
            # The indexes where the sample values are below threshold
            negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
            # Label those as '-1'
            predictions[negative_idx] = -1
            # Calculate new weights 
            # Missclassified samples gets larger weights and correctly classified samples smaller
            w *= np.exp(-clf.alpha * y * predictions)
            # Normalize to one
            w /= np.sum(w)

            # Save classifier
            self.clfs.append(clf)

    def predict(self, X):
        n_samples = np.shape(X)[0]
        y_pred = np.zeros((n_samples, 1))
        # For each classifier => label the samples
        for clf in self.clfs:
            # Set all predictions to '1' initially
            predictions = np.ones(np.shape(y_pred))
            # The indexes where the sample values are below threshold
            negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
            # Label those as '-1'
            predictions[negative_idx] = -1
            # Add predictions weighted by the classifiers alpha
            # (alpha indicative of classifier's proficiency)
            y_pred += clf.alpha * predictions

        # Return sign of prediction sum
        y_pred = np.sign(y_pred).flatten()

        return y_pred


def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification with 5 weak classifiers
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)


if __name__ == "__main__":
    main()

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者

人面猴
序言：七十年代末谤牡，一起剝皮案震驚了整個(gè)濱河市俏拱，隨后出現(xiàn)的幾起案子，更是在濱河造成了極大的恐慌圆裕，老刑警劉巖狈醉，帶你破解...
沈念sama閱讀 222,865評(píng)論 6贊 518
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件，死亡現(xiàn)場(chǎng)離奇詭異，居然都是意外死亡询兴，警方通過(guò)查閱死者的電腦和手機(jī)，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 95,296評(píng)論 3贊 399
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進(jìn)店門(mén)起趾，熙熙樓的掌柜王于貴愁眉苦臉地迎上來(lái)诗舰，“玉大人，你說(shuō)我怎么就攤上這事阳掐∈夹疲” “怎么了冷蚂？”我有些...
開(kāi)封第一講書(shū)人閱讀 169,631評(píng)論 0贊 364
道士緝兇錄：失蹤的賣(mài)姜人
文/不壞的土叔我叫張陵，是天一觀的道長(zhǎng)汛闸。經(jīng)常有香客問(wèn)我蝙茶，道長(zhǎng)，這世上最難降的妖魔是什么诸老？我笑而不...
開(kāi)封第一講書(shū)人閱讀 60,199評(píng)論 1贊 300
?港島之戀（遺憾婚禮）
正文為了忘掉前任隆夯，我火速辦了婚禮，結(jié)果婚禮上别伏，老公的妹妹穿的比我還像新娘蹄衷。我一直安慰自己，他們只是感情好厘肮，可當(dāng)我...
茶點(diǎn)故事閱讀 69,196評(píng)論 6贊 398
惡毒庶女頂嫁案：這布局不是一般人想出來(lái)的
文/花漫我一把揭開(kāi)白布愧口。她就那樣靜靜地躺著，像睡著了一般类茂。火紅的嫁衣襯著肌膚如雪耍属。梳的紋絲不亂的頭發(fā)上，一...
開(kāi)封第一講書(shū)人閱讀 52,793評(píng)論 1贊 314
城市分裂傳說(shuō)
那天巩检，我揣著相機(jī)與錄音厚骗，去河邊找鬼。笑死兢哭，一個(gè)胖子當(dāng)著我的面吹牛领舰，可吹牛的內(nèi)容都是我干的。我是一名探鬼主播迟螺，決...
沈念sama閱讀 41,221評(píng)論 3贊 423
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開(kāi)眼冲秽，長(zhǎng)吁一口氣：“原來(lái)是場(chǎng)噩夢(mèng)啊……” “哼！你這毒婦竟也來(lái)了煮仇？” 一聲冷哼從身側(cè)響起劳跃，我...
開(kāi)封第一講書(shū)人閱讀 40,174評(píng)論 0贊 277
萬(wàn)榮殺人案實(shí)錄
序言：老撾萬(wàn)榮一對(duì)情侶失蹤，失蹤者是張志新（化名）和其女友劉穎浙垫，沒(méi)想到半個(gè)月后刨仑，有當(dāng)?shù)厝嗽跇?shù)林里發(fā)現(xiàn)了一具尸體，經(jīng)...
沈念sama閱讀 46,699評(píng)論 1贊 320
?護(hù)林員之死
正文獨(dú)居荒郊野嶺守林人離奇死亡夹姥，尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點(diǎn)故事閱讀 38,770評(píng)論 3贊 343
?白月光啟示錄
正文我和宋清朗相戀三年杉武，在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了。大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片辙售。...
茶點(diǎn)故事閱讀 40,918評(píng)論 1贊 353
活死人
序言：一個(gè)原本活蹦亂跳的男人離奇死亡轻抱，死狀恐怖，靈堂內(nèi)的尸體忽然破棺而出旦部，到底是詐尸還是另有隱情祈搜，我是刑警寧澤较店，帶...
沈念sama閱讀 36,573評(píng)論 5贊 351
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布，位于F島的核電站容燕，受9級(jí)特大地震影響梁呈，放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜蘸秘，卻給世界環(huán)境...
茶點(diǎn)故事閱讀 42,255評(píng)論 3贊 336
男人毒藥：我在死后第九天來(lái)索命
文/蒙蒙一官卡、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧醋虏，春花似錦寻咒、人聲如沸。這莊子的主人今日做“春日...
開(kāi)封第一講書(shū)人閱讀 32,749評(píng)論 0贊 25
一樁弒父案毛秘，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽(yáng)。三九已至粘舟，卻和暖如春熔脂，著一層夾襖步出監(jiān)牢的瞬間，已是汗流浹背柑肴。一陣腳步聲響...
開(kāi)封第一講書(shū)人閱讀 33,862評(píng)論 1贊 274
情欲美人皮
我被黑心中介騙來(lái)泰國(guó)打工，沒(méi)想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留旬薯，地道東北人晰骑。一個(gè)月前我還...
沈念sama閱讀 49,364評(píng)論 3贊 379
代替公主和親
正文我出身青樓，卻偏偏與公主長(zhǎng)得像绊序，于是被迫代替她去往敵國(guó)和親硕舆。傳聞我的和親對(duì)象是個(gè)殘疾皇子，可洞房花燭夜當(dāng)晚...
茶點(diǎn)故事閱讀 45,926評(píng)論 2贊 361

[Machine Learning From Scratch]-supervised_learning-adaboost

推薦閱讀更多精彩內(nèi)容