library(e1071)
library(ggplot2)
data('thyroid',package = 'mclust')
plot(thyroid$Diagnosis)
data<-thyroid
set.seed(2016)
N<-nrow(thyroid)
train<-sample(1:N,150,FALSE)#在1到N抽樣,抽150次伦连,采樣不更換
head(train)
fit<-naiveBayes(Diagnosis ~.,data=data[train,])
attributes(fit)#查看屬性
#$names
#[1] "apriori" "tables" "levels" "isnumeric"
#[5] "call"
#$class
#[1] "naiveBayes"
fit$apriori#參數(shù)aprioori包含類別分布
fit$table$RT3U
> fit$table$RT3U
RT3U
Y [,1] [,2]
Hypo 121.2632 10.943502
Normal 111.3585 7.950069
Hyper 93.5200 19.977320
#分別為均值和標準差
pred<-predict(fit,data[-train,-1],type='class')#給出分類
head(pred,4)
pred<-predict(fit,data[-train,-1],type='raw')#給出概率
table(pred,data$Diagnosis[-train])
#pred Hypo Normal Hyper
#Hypo 11 1 0
#Normal 0 43 0
#Hyper 0 0 10
混淆矩陣顯示學習效果良好