from __future__ import division, print_function
import numpy as np
import math
from sklearn import datasets
import matplotlib.pyplot as plt
import pandas as pd
# Import helper functions
from mlfromscratch.utils import train_test_split, accuracy_score, Plot
# Decision stump used as weak classifier in this impl. of Adaboost
class DecisionStump():
def __init__(self):
# Determines if sample shall be classified as -1 or 1 given threshold
self.polarity = 1
# The index of the feature used to make classification
self.feature_index = None
# The threshold value that the feature should be measured against
self.threshold = None
# Value indicative of the classifier's accuracy
self.alpha = None
class Adaboost():
"""Boosting method that uses a number of weak classifiers in
ensemble to make a strong classifier. This implementation uses decision
stumps, which is a one level Decision Tree.
Parameters:
-----------
n_clf: int
The number of weak classifiers that will be used.
"""
def __init__(self, n_clf=5):
self.n_clf = n_clf
def fit(self, X, y):
n_samples, n_features = np.shape(X)
# Initialize weights to 1/N
w = np.full(n_samples, (1 / n_samples))
self.clfs = []
# Iterate through classifiers
for _ in range(self.n_clf):
clf = DecisionStump()
# Minimum error given for using a certain feature value threshold
# for predicting sample label
min_error = float('inf')
# Iterate throught every unique feature value and see what value
# makes the best threshold for predicting y
for feature_i in range(n_features):
feature_values = np.expand_dims(X[:, feature_i], axis=1)
unique_values = np.unique(feature_values)
# Try every unique feature value as threshold
for threshold in unique_values:
p = 1
# Set all predictions to '1' initially
prediction = np.ones(np.shape(y))
# Label the samples whose values are below threshold as '-1'
prediction[X[:, feature_i] < threshold] = -1
# Error = sum of weights of misclassified samples
error = sum(w[y != prediction])
# If the error is over 50% we flip the polarity so that samples that
# were classified as 0 are classified as 1, and vice versa
# E.g error = 0.8 => (1 - error) = 0.2
if error > 0.5:
error = 1 - error
p = -1
# If this threshold resulted in the smallest error we save the
# configuration
if error < min_error:
clf.polarity = p
clf.threshold = threshold
clf.feature_index = feature_i
min_error = error
# Calculate the alpha which is used to update the sample weights,
# Alpha is also an approximation of this classifier's proficiency
clf.alpha = 0.5 * math.log((1.0 - min_error) / (min_error + 1e-10))
# Set all predictions to '1' initially
predictions = np.ones(np.shape(y))
# The indexes where the sample values are below threshold
negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
# Label those as '-1'
predictions[negative_idx] = -1
# Calculate new weights
# Missclassified samples gets larger weights and correctly classified samples smaller
w *= np.exp(-clf.alpha * y * predictions)
# Normalize to one
w /= np.sum(w)
# Save classifier
self.clfs.append(clf)
def predict(self, X):
n_samples = np.shape(X)[0]
y_pred = np.zeros((n_samples, 1))
# For each classifier => label the samples
for clf in self.clfs:
# Set all predictions to '1' initially
predictions = np.ones(np.shape(y_pred))
# The indexes where the sample values are below threshold
negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
# Label those as '-1'
predictions[negative_idx] = -1
# Add predictions weighted by the classifiers alpha
# (alpha indicative of classifier's proficiency)
y_pred += clf.alpha * predictions
# Return sign of prediction sum
y_pred = np.sign(y_pred).flatten()
return y_pred
def main():
data = datasets.load_digits()
X = data.data
y = data.target
digit1 = 1
digit2 = 8
idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
y = data.target[idx]
# Change labels to {-1, 1}
y[y == digit1] = -1
y[y == digit2] = 1
X = data.data[idx]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Adaboost classification with 5 weak classifiers
clf = Adaboost(n_clf=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print ("Accuracy:", accuracy)
# Reduce dimensions to 2d using pca and plot the results
Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
if __name__ == "__main__":
main()
[Machine Learning From Scratch]-supervised_learning-adaboost
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
- 文/潘曉璐 我一進(jìn)店門(mén)起趾,熙熙樓的掌柜王于貴愁眉苦臉地迎上來(lái)诗舰,“玉大人,你說(shuō)我怎么就攤上這事阳掐∈夹疲” “怎么了冷蚂?”我有些...
- 文/不壞的土叔 我叫張陵,是天一觀的道長(zhǎng)汛闸。 經(jīng)常有香客問(wèn)我蝙茶,道長(zhǎng),這世上最難降的妖魔是什么诸老? 我笑而不...
- 正文 為了忘掉前任隆夯,我火速辦了婚禮,結(jié)果婚禮上别伏,老公的妹妹穿的比我還像新娘蹄衷。我一直安慰自己,他們只是感情好厘肮,可當(dāng)我...
- 文/花漫 我一把揭開(kāi)白布愧口。 她就那樣靜靜地躺著,像睡著了一般类茂。 火紅的嫁衣襯著肌膚如雪耍属。 梳的紋絲不亂的頭發(fā)上,一...
- 那天巩检,我揣著相機(jī)與錄音厚骗,去河邊找鬼。 笑死兢哭,一個(gè)胖子當(dāng)著我的面吹牛领舰,可吹牛的內(nèi)容都是我干的。 我是一名探鬼主播迟螺,決...
- 文/蒼蘭香墨 我猛地睜開(kāi)眼冲秽,長(zhǎng)吁一口氣:“原來(lái)是場(chǎng)噩夢(mèng)啊……” “哼!你這毒婦竟也來(lái)了煮仇?” 一聲冷哼從身側(cè)響起劳跃,我...
- 序言:老撾萬(wàn)榮一對(duì)情侶失蹤,失蹤者是張志新(化名)和其女友劉穎浙垫,沒(méi)想到半個(gè)月后刨仑,有當(dāng)?shù)厝嗽跇?shù)林里發(fā)現(xiàn)了一具尸體,經(jīng)...
- 正文 獨(dú)居荒郊野嶺守林人離奇死亡夹姥,尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
- 正文 我和宋清朗相戀三年杉武,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片辙售。...
- 正文 年R本政府宣布,位于F島的核電站容燕,受9級(jí)特大地震影響梁呈,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜蘸秘,卻給世界環(huán)境...
- 文/蒙蒙 一官卡、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧醋虏,春花似錦寻咒、人聲如沸。這莊子的主人今日做“春日...
- 文/蒼蘭香墨 我抬頭看了看天上的太陽(yáng)。三九已至粘舟,卻和暖如春熔脂,著一層夾襖步出監(jiān)牢的瞬間,已是汗流浹背柑肴。 一陣腳步聲響...
- 正文 我出身青樓,卻偏偏與公主長(zhǎng)得像绊序,于是被迫代替她去往敵國(guó)和親硕舆。 傳聞我的和親對(duì)象是個(gè)殘疾皇子,可洞房花燭夜當(dāng)晚...