聚類的應(yīng)用
上周在互聯(lián)網(wǎng)瀏覽無意間了解到雷達信號處理的最后一步是點跡輸出变逃,用的是聚類算法。
搜索了下聚類算法箕宙,好多呀!不過發(fā)現(xiàn)kmeans是我之前學(xué)機器學(xué)習(xí)課程中KNN分類用的资盅。python只要幾行代碼就解決了,用C++演練一把恃慧,就當(dāng)我在開發(fā)雷達了,沒有代碼優(yōu)化渺蒿,僅僅實現(xiàn)功能~
Kmeans的思想
就是根據(jù)離哪類近就判斷屬于哪類痢士,來進行歸類和預(yù)測的。
- 初始化K個類茂装,然后每個類初始化一個中心點怠蹂。
- 對比每個點到類的距離,到哪個類近就屬于哪類少态。
- 通過均值更新每一類的中心城侧。
- 循環(huán)第2和第3步驟,知道每個類的中心不再移動彼妻。
昨天完成code嫌佑,今天測試調(diào)試了下,主要解決了一些小bug和一個大bug(在執(zhí)行完聚類過程后澳骤,居然某一類的數(shù)量為0個歧强。至少應(yīng)該有一個為它本身吧澜薄!后來通過萬能的debug解決了)
重要參數(shù)介紹
我的初始化值用的是隨機選擇为肮,初始化值選的不適合,它更新中心點的次數(shù)也會曾多肤京。
如下代碼的輸入點為24個颊艳,選擇設(shè)置為4類茅特,測試點為2個。
同學(xué)們有需要可以自行修改這些可配置參數(shù)棋枕,以適配你的應(yīng)用白修。
C++代碼
KNN.hpp
/* Author: AppleCai Date 2019/04/28 */
#include<vector>
#include<iostream>
#include<cmath>
using namespace std;
#define OBJ_LEN 24
#define TEST_LEN 2
class Kmeans
{
public:
typedef struct mypoint
{
float x;
float y;
int tag;
}st_mypoint;
enum my_enum
{
NO=0,
YES
};
Kmeans(int KgNum);
void dataInit(st_mypoint input[]);
void updateCenter(st_mypoint point[]); //set Kcenter;
float calDistance(st_mypoint p1,st_mypoint p2); //Euclidean distance
void Cluster(st_mypoint point[]); //set the Kgrouplist;
void KNN(st_mypoint input[]);
bool checkEnd(vector<st_mypoint> p1group,vector<st_mypoint> p2group);
void KNNtest(st_mypoint input[]);
private:
int KgroupNum; //the number of cluster
vector<st_mypoint> Kcenter; //the center of each cluster
vector<vector<int>> Kgrouplist; //the point to the related group
};
KNN.cpp
/* Author: AppleCai Date 2019/04/28 */
#include "KNN.hpp"
Kmeans::Kmeans(int KgNum)
{
KgroupNum = KgNum;
}
/* init the K group and select the group center */
void Kmeans::dataInit(st_mypoint input[])
{
int i,N;
for(i=0;i<KgroupNum;i++)
{
N = rand() % OBJ_LEN;
Kcenter.push_back(input[N]);
cout<<"Kcenter="<<Kcenter[i].x<<" "<<Kcenter[i].y<<endl;
}
for(i=0;i<KgroupNum;i++) /* we shall init the space,otherwise it will said not available in the running time*/
{
vector<int> temp;
Kgrouplist.push_back(temp);
}
}
/*Calculate average value,update center*/
void Kmeans::updateCenter(st_mypoint point[])
{
int i=0;
int j=0;
for(i=0;i<KgroupNum;i++)
{
st_mypoint sum={0,0};
for(j=0;j<Kgrouplist.at(i).size();j++)
{
point[Kgrouplist[i][j]].tag = i; /* add tag to each of the point */
// for debug
// cout<<"Kgrouplist[i][j]"<<point[Kgrouplist[i][j]].x<<point[Kgrouplist[i][j]].y<<"size"<<Kgrouplist.at(i).size()<<endl;
sum.x=(sum.x+point[(Kgrouplist[i][j])].x);
sum.y=(sum.y+point[(Kgrouplist[i][j])].y);
}
sum.x = sum.x/Kgrouplist.at(i).size();
sum.y = sum.y/Kgrouplist.at(i).size();
Kcenter[i]=sum;
Kcenter[i].tag = i; /* add tag for center */
cout<<"updateCenter=["<<i<<"]="<<sum.x<<" "<<sum.y<<" "<<Kcenter[i].tag<<endl;
}
}
/*Euclidean distance*/
float Kmeans::calDistance(st_mypoint p1,st_mypoint p2)
{
float dis=0;
dis=sqrt(pow((p1.x-p2.x),2)
+pow((p1.y-p2.y),2)
);
return dis;
}
/* Cluster */
void Kmeans::Cluster(st_mypoint point[])
{
float mindistance;
float curdistance;
int index=0;
int i,j;
//shall add, otherwise the size of Kgrouplist will become large
for(i=0;i<KgroupNum;i++)
{
Kgrouplist.at(i).clear();
}
for(j=0;j<OBJ_LEN;j++)
{
/* calculate the distance between the first group and point,pretend that's the smallest one*/
mindistance= calDistance(Kcenter[0],point[j]);
index=0; /* prepare group list index, suppose the first group is the nearst one*/
for(i=1;i<KgroupNum;i++)
{
curdistance = calDistance(Kcenter[i],point[j]);
if(curdistance<mindistance)
{
mindistance = curdistance;
index=i;
}
}
/* put the point to the minimal group list */
Kgrouplist.at(index).push_back(j);
}
/* TODO: used for debug,later shall delete*/
// for(i=0;i<KgroupNum;i++)
// {
// cout<<"Size of grouplist="<<Kgrouplist[i].size()<<endl;
// for(j=0;j<Kgrouplist[i].size();j++)
// {
// cout<<"Kgrouplist["<<i<<"]["<<j<<"]="<<Kgrouplist[i][j]<<endl;
// }
// cout<<"next group:"<<endl;
// }
}
bool Kmeans::checkEnd(vector<st_mypoint> p1group,vector<st_mypoint> p2group)
{
bool ret = YES;
int i;
for(i=0;i<KgroupNum;i++)
{ /* set the check condition to stop KNN */
if(fabs(p1group[i].x - p2group[i].x)>0.001||
fabs(p1group[i].y - p2group[i].y)>0.001
)
{
ret = NO;
}
}
return ret;
}
void Kmeans::KNN(st_mypoint input[])
{
bool flag=NO;
int cnt=0;
int i;
dataInit(input);
vector<st_mypoint> preKcenter(KgroupNum);
for(i=0;i<KgroupNum;i++)
{
preKcenter[i].x=0;
preKcenter[i].y=0;
}
while((NO == flag)&&(cnt<100))
{
cnt++;
Cluster(input);
updateCenter(input);
flag = checkEnd(preKcenter,Kcenter);
copy(Kcenter.begin(),Kcenter.end(),preKcenter.begin());
}
cout<<"cnt="<<cnt<<endl;
for(i=0;i<KgroupNum;i++)
{
cout<<"Kcenter["<<i<<"]="<<Kcenter[i].x<<" "<<Kcenter[i].y<<endl;
}
}
void Kmeans::KNNtest(st_mypoint input[])
{
int i,j;
float mindistance,curdistance;
for(j=0;j<TEST_LEN;j++)
{
/* calculate the distance between the first group and point,pretend that's the smallest one*/
mindistance= calDistance(Kcenter[0],input[j]);
input[j].tag = 0; /* pretend all the initial tag is 0 */
for(i=1;i<KgroupNum;i++)
{
curdistance = calDistance(Kcenter[i],input[j]);
if(curdistance<mindistance)
{
mindistance = curdistance;
input[j].tag = i; /* if found the smaller distance,update tag */
cout<<"input["<<i<<"].tag="<<i<<endl;
}
}
}
// for debug
// for(j=0;j<KgroupNum;j++)
// {
// cout<<"The result of TestData["<<j<<"] is "<<input[j].tag<<endl;
// }
}
main.cpp
/* Author: AppleCai Date 2019/04/28 */
#include<iostream>
#include<fstream>
#include<vector>
#include"KNN.hpp"
using namespace std;
int main(void)
{
Kmeans::st_mypoint TrainData[OBJ_LEN];
Kmeans::st_mypoint TestData[TEST_LEN];
int i;
ifstream fin("in.txt");
ofstream fout("out.txt");
for(int i=0;i<OBJ_LEN;i++)
{
fin>>TrainData[i].x>>TrainData[i].y;
cout<<TrainData[i].x<<","<<TrainData[i].y<<endl;
}
fin.close();
Kmeans mytest(4); //Kgourp set to 4
mytest.KNN(TrainData);
TestData[0].x = 6;
TestData[0].y = 6;
TestData[1].x = 0;
TestData[1].y = 1;
mytest.KNNtest(TestData);
for(i=0;i<TEST_LEN;i++)
{
fout<<"The result of TestData is:"<<TestData[i].x<<" "<<TestData[i].y<<" "<<TestData[i].tag<<endl;
}
return 0;
}