2019-02-04

import pandasas pd

import matplotlib.pyplotas plt

import numpyas np

data= pd.read_csv("creditcard.csv")

a=pd.value_counts(data["Class"])

count_classes= pd.value_counts(data['Class'], sort = True).sort_index()

from sklearn.preprocessingimport StandardScaler

# 1、StandardScaler就是z-score方法

# 將原始數(shù)據(jù)歸一化為均值為0，方差為1的數(shù)據(jù)集并將之存儲到Amount列

data['normAmount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))

#? 刪除數(shù)據(jù)中Time? Amount 列

# 刪除沒用的兩列數(shù)據(jù)九杂，得到一個新的數(shù)據(jù)集

data= data.drop(['Time','Amount'],axis=1)

# 先對數(shù)據(jù)進行切分

X= data.ix[:, data.columns!= 'Class']

y= data.ix[:, data.columns== 'Class']

# 隨機下采樣

# 篩選出class為1的數(shù)據(jù)總數(shù)蝌箍，并取得其索引值

# Number of data points in the minority class

# 統(tǒng)計異常值得個數(shù)

number_records_fraud= len(data[data.Class== 1])

# 統(tǒng)計欺詐樣本的下標沈善，并變成矩陣的格式：

fraud_indices= np.array(data[data.Class== 1].index)

# Picking the indices of the normal classes

# 記錄正常值的下標：

# 把class為0的數(shù)據(jù)索引拿到手

normal_indices= data[data.Class== 0].index

# Out of the indices we picked, randomly select "x" number (number_records_fraud)

# 從normal_indices中抽取number_records_fraud

# 從正常值的索引中沈贝，選擇和異常值相等個數(shù)的樣本谆吴，保證樣本的均衡：

# np.random.choice(a,size, replace, p):在a中以概率p隨機選擇size個數(shù)據(jù)，replace是指是否有放回攘蔽；

random_normal_indices= np.random.choice(normal_indices, number_records_fraud, replace = False)

# 將數(shù)據(jù)轉(zhuǎn)換成數(shù)組：

# 轉(zhuǎn)換成numpy的array格式

random_normal_indices= np.array(random_normal_indices)

# Appending the 2 indices

# fraud_indices：欺詐樣本的下標；random_normal_indices：正常值數(shù)組呐粘；

# concatenate：數(shù)據(jù)庫的拼接满俗；axis=1：按照對應(yīng)行的數(shù)據(jù)進行拼接；

# 將兩組索引數(shù)據(jù)連接成性的數(shù)據(jù)索引

under_sample_indices= np.concatenate([fraud_indices,random_normal_indices])

# Under sample dataset

# loc["a","b"]:表示第a行作岖，第b列唆垃；

# iloc[1,1]:按照行列來索引，左式為第二行第二列痘儡；

# 獲取下標所在行的所有列辕万，即得到訓(xùn)練所需要的數(shù)據(jù)集：

# 下采樣數(shù)據(jù)集

# 定位到真正的數(shù)據(jù)

under_sample_data= data.iloc[under_sample_indices,:]

# 將數(shù)據(jù)集按照class列進行分類

# 切分出下采樣數(shù)據(jù)的特征和標簽

X_undersample= under_sample_data.ix[:, under_sample_data.columns!= 'Class']

y_undersample= under_sample_data.ix[:, under_sample_data.columns== 'Class']

# Showing ratio

# 展示下比例

# 計算正負比例為0.5

print("Percentage of normal transactions: ", len(under_sample_data[under_sample_data.Class== 0])/len(under_sample_data))

print("Percentage of fraud transactions: ", len(under_sample_data[under_sample_data.Class== 1])/len(under_sample_data))

print("Total number of transactions in resampled data: ", len(under_sample_data))

# 導(dǎo)入交叉驗證模塊的數(shù)據(jù)切分

from sklearn.model_selectionimport train_test_split

# Whole dataset

# 交叉驗證

# 隨機劃分訓(xùn)練集和測試集：x為除了class之外的其他的值，y為最終的結(jié)果列沉删；

# test_size:樣本占比渐尿；

# 從原始集中獲取到訓(xùn)練集與測試集：

# train_test_split：x,y按照test_size的尺寸隨機提取數(shù)據(jù)，然后劃分到四個數(shù)據(jù)集中

# 對全部數(shù)據(jù)集進行切分丑念，注意使用相同的隨機策略

X_train, X_test, y_train, y_test= train_test_split(X,y,test_size = 0.3, random_state = 0)

print("Number transactions train dataset: ", len(X_train))

print("Number transactions test dataset: ", len(X_test))

print("Total number of transactions: ", len(X_train)+len(X_test))

# Undersampled dataset

# 數(shù)據(jù)平衡之后的數(shù)據(jù)中獲取到訓(xùn)練集與測試集：

# 對下采樣數(shù)據(jù)集進行切分

X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample= train_test_split(X_undersample

,y_undersample

,test_size = 0.3

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ,random_state = 0)

print("")

print("Number transactions train dataset: ", len(X_train_undersample))

print("Number transactions test dataset: ", len(X_test_undersample))

print("Total number of transactions: ", len(X_train_undersample)+len(X_test_undersample))

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末涡戳，一起剝皮案震驚了整個濱河市，隨后出現(xiàn)的幾起案子脯倚，更是在濱河造成了極大的恐慌渔彰，老刑警劉巖嵌屎，帶你破解...
沈念sama閱讀 217,826評論 6贊 506
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件，死亡現(xiàn)場離奇詭異恍涂，居然都是意外死亡宝惰，警方通過查閱死者的電腦和手機，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 92,968評論 3贊 395
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門再沧，熙熙樓的掌柜王于貴愁眉苦臉地迎上來尼夺，“玉大人，你說我怎么就攤上這事炒瘸∮俣拢” “怎么了？”我有些...
開封第一講書人閱讀 164,234評論 0贊 354
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵顷扩，是天一觀的道長拐邪。經(jīng)常有香客問我，道長隘截，這世上最難降的妖魔是什么扎阶？我笑而不...
開封第一講書人閱讀 58,562評論 1贊 293
?港島之戀（遺憾婚禮）
正文為了忘掉前任，我火速辦了婚禮婶芭，結(jié)果婚禮上东臀，老公的妹妹穿的比我還像新娘。我一直安慰自己犀农，他們只是感情好惰赋，可當我...
茶點故事閱讀 67,611評論 6贊 392
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布。她就那樣靜靜地躺著井赌，像睡著了一般谤逼。火紅的嫁衣襯著肌膚如雪。梳的紋絲不亂的頭發(fā)上仇穗，一...
開封第一講書人閱讀 51,482評論 1贊 302
城市分裂傳說
那天流部，我揣著相機與錄音，去河邊找鬼纹坐。笑死枝冀，一個胖子當著我的面吹牛，可吹牛的內(nèi)容都是我干的耘子。我是一名探鬼主播果漾，決...
沈念sama閱讀 40,271評論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼，長吁一口氣：“原來是場噩夢啊……” “哼谷誓！你這毒婦竟也來了绒障？” 一聲冷哼從身側(cè)響起，我...
開封第一講書人閱讀 39,166評論 0贊 276
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤捍歪，失蹤者是張志新（化名）和其女友劉穎户辱，沒想到半個月后鸵钝，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體，經(jīng)...
沈念sama閱讀 45,608評論 1贊 314
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡庐镐，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 37,814評論 3贊 336
?白月光啟示錄
正文我和宋清朗相戀三年恩商，在試婚紗的時候發(fā)現(xiàn)自己被綠了。大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片必逆。...
茶點故事閱讀 39,926評論 1贊 348
活死人
序言：一個原本活蹦亂跳的男人離奇死亡怠堪，死狀恐怖，靈堂內(nèi)的尸體忽然破棺而出名眉，到底是詐尸還是另有隱情粟矿，我是刑警寧澤，帶...
沈念sama閱讀 35,644評論 5贊 346
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布璧针，位于F島的核電站嚷炉，受9級特大地震影響，放射性物質(zhì)發(fā)生泄漏探橱。R本人自食惡果不足惜，卻給世界環(huán)境...
茶點故事閱讀 41,249評論 3贊 329
男人毒藥：我在死后第九天來索命
文/蒙蒙一绘证、第九天我趴在偏房一處隱蔽的房頂上張望隧膏。院中可真熱鬧，春花似錦嚷那、人聲如沸胞枕。這莊子的主人今日做“春日...
開封第一講書人閱讀 31,866評論 0贊 22
一樁弒父案魏宽，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽腐泻。三九已至，卻和暖如春队询，著一層夾襖步出監(jiān)牢的瞬間派桩，已是汗流浹背。一陣腳步聲響...
開封第一講書人閱讀 32,991評論 1贊 269
情欲美人皮
我被黑心中介騙來泰國打工蚌斩，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留铆惑，地道東北人。一個月前我還...
沈念sama閱讀 48,063評論 3贊 370
代替公主和親
正文我出身青樓送膳，卻偏偏與公主長得像员魏，于是被迫代替她去往敵國和親。傳聞我的和親對象是個殘疾皇子叠聋，可洞房花燭夜當晚...
茶點故事閱讀 44,871評論 2贊 354

2019-02-04

推薦閱讀更多精彩內(nèi)容