主成分分析簡介
在多元統(tǒng)計(jì)分析中凶赁,主成分分析(英語:Principal components analysis魁淳,PCA)是一種分析盆驹、簡化數(shù)據(jù)集的技術(shù)。主成分分析經(jīng)常用于減少數(shù)據(jù)集的維數(shù)聊记,同時(shí)保持?jǐn)?shù)據(jù)集中的對(duì)方差貢獻(xiàn)最大的特征撒妈。這是通過保留低階主成分,忽略高階主成分做到的排监。這樣低階成分往往能夠保留住數(shù)據(jù)的最重要方面狰右。
計(jì)算步驟
輸入:n維樣本集D=(x(1),x(2),...,x(m)),要降維到的維數(shù)n.
輸出:降維后的樣本集D′ 1) 對(duì)所有的樣本進(jìn)行中心化: 2) 計(jì)算樣本的協(xié)方差矩陣X*XT
3) 對(duì)矩陣X*XT進(jìn)行特征值分解
4)取出最大的n個(gè)特征值(從大到小排序)對(duì)應(yīng)的特征向量(w1,w2,...,wn), 將所有的特征向量標(biāo)準(zhǔn)化后舆床,組成特征向量矩陣W棋蚌。
5)對(duì)樣本集中的每一個(gè)樣本x(i),轉(zhuǎn)化為新的樣本z(i)=WT*x(i) 6) 得到輸出樣本集D′=(z(1),z(2),...,z(m)) 即前m成分
博客園有一篇博客詳細(xì)介紹了PCA 原理
代碼實(shí)現(xiàn)
主要用到了gdal和Eigen庫
gdal用于讀寫遙感多光譜影像
Eigen則便于各種矩陣運(yùn)算
#include"gdal_priv.h"
#include"cpl_conv.h" // for CPLMalloc()
#include<iostream>
#include<fstream>
#include<string>
#include"Eigen/Dense"
using namespace std;
using namespace Eigen;
//eigen實(shí)現(xiàn)主成分分析
void featurenormalize(MatrixXd &X)
{
//計(jì)算每一維度均值
MatrixXd meanval = X.colwise().mean();
RowVectorXd meanvecRow = meanval;
//樣本均值化為0
X.rowwise() -= meanvecRow;
}
void computeCov(MatrixXd &X, MatrixXd &C)
{
//計(jì)算協(xié)方差矩陣C = XTX / n-1;
C = X.adjoint() * X;
C = C.array() / (X.rows() - 1);
}
void computeEig(MatrixXd &C, MatrixXd &vec, MatrixXd &val)
{
//計(jì)算特征值和特征向量嫁佳,使用selfadjont按照對(duì)陣矩陣的算法去計(jì)算,可以讓產(chǎn)生的vec和val按照有序排列(默認(rèn)從大到泄饶骸)
SelfAdjointEigenSolver<MatrixXd> eig(C);
vec = eig.eigenvectors();
val = eig.eigenvalues();
}
int computeDim(MatrixXd &val)
{
//輸出信息量達(dá)到95%的前n主成分
/*int dim;
double sum = 0;
for (int i = val.rows() - 1; i >= 0; --i)
{
sum += val(i, 0);
dim = i;
if (sum / val.sum() >= 0.95)
break;
}
return val.rows() - dim;*/
return 7;//這里設(shè)置輸出7個(gè)主成分
}
void writePcaImg(const char* path, int width, int height, double *pBuff, double *adfGeo, const char *prj, int bandNum, int imageSize, int pcaInd)
{
GDALDriver *pDriver = GetGDALDriverManager()->GetDriverByName("GTiff"); //圖像驅(qū)動(dòng)
char** ppszOptions = NULL;
int depth = 8;//圖像位深
int dim = 1;//每個(gè)圖像波段數(shù)蒿往,這里將每個(gè)主成分存儲(chǔ)到一個(gè)單波段圖像
GDALDataset* dst = pDriver->Create(path, width, height, dim, GDT_Float64, ppszOptions);//創(chuàng)建圖像
if (dst == nullptr)
printf("Can't Write Image!");
dst->SetGeoTransform(adfGeo);//設(shè)置坐標(biāo)
dst->SetProjection(prj);//設(shè)置投影
dst->RasterIO(GF_Write, 0, 0, width, height, &pBuff[(bandNum - pcaInd)*imageSize], width, height,
GDT_Float64, dim, nullptr, dim*depth, width*dim*depth, depth);//寫入圖像
GDALClose(dst);
}
int main(int argc, char *argv[])
{ //讀取影像
char* pszFilename = "D:/gdalData/pca/before.img";
char *outPath = "D:/pca_temp/pca";
GDALDataset *poDataset;
GDALAllRegister();
poDataset = (GDALDataset *)GDALOpen(pszFilename, GA_ReadOnly);
if (poDataset == NULL)
{
printf_s("read failed!\n");
}
else
{
printf_s("read successful!\n");
}
double adfGeoTransform[6];
if (poDataset->GetGeoTransform(adfGeoTransform) == CE_Failure)//讀取坐標(biāo)信息
{
printf("獲取參數(shù)失敗");
}
const char *prj = poDataset->GetProjectionRef();//讀取投影信息
int iWidth = poDataset->GetRasterXSize();//圖像寬度
int iHeight = poDataset->GetRasterYSize();//圖像高度
int iBandCount = poDataset->GetRasterCount();//波段數(shù)
int iImageSize = iWidth * iHeight;//圖像像元數(shù)
double *pBuff1 = new double[iImageSize*iBandCount];//開辟空間存儲(chǔ)原始圖像
poDataset->RasterIO(GF_Read, 0, 0, iWidth, iHeight, pBuff1,
iWidth, iHeight, GDT_Float64, iBandCount, 0, 0, 0, 0);//讀取原始圖像
MatrixXd staMat = Map<MatrixXd>(pBuff1, iImageSize, iBandCount);//將圖像讀入eigen矩陣
MatrixXd X(iImageSize, iBandCount), C(iBandCount, iBandCount);//按波段存儲(chǔ)至X矩陣,構(gòu)建協(xié)方差矩陣C
MatrixXd vec, val;//構(gòu)建特征向量湿弦、特征值矩陣vec瓤漏、val
X = MatrixXd(staMat);
//零均值化
featurenormalize(X);
//計(jì)算協(xié)方差
computeCov(X, C);
//計(jì)算特征值和特征向量
computeEig(C, vec, val);
//計(jì)算損失率,確定降低維數(shù)
int dim = computeDim(val);
//計(jì)算結(jié)果
MatrixXd res = X * vec.rightCols(dim);
//將主成分分量存儲(chǔ)至pBuff2
double *pBuff2 = new double[iImageSize*iBandCount];
for (int i = 0; i < dim; ++i)
{
for (int j = 0; j < iImageSize; ++j)
{
pBuff2[i*iImageSize + j] = res(j, i);
}
}
//各個(gè)主成分寫入圖像(包含坐標(biāo)及投影信息)
for (int i = 0; i < iBandCount; i++)
{
char x[]=" ";
strcpy(x, outPath);
char dstPath[10] = {};
sprintf(dstPath, "%d.tif", i + 1);
strcat(x, dstPath);
writePcaImg(x, iWidth, iHeight, pBuff2, adfGeoTransform, prj, 7, iImageSize, i + 1);
cout << "pca " << i + 1 << " complete" << endl;
}
cout << "pca complete!" << endl;
cin.get();
return 0;
}