Above All
機(jī)器學(xué)習(xí)的大作業(yè)是寫圖像分類啡莉。這里我整理一些有用的參考資料,以便后來提交報告的時候邏輯比較清晰。
主要想用的特征還是SIFT和SURF昭娩,當(dāng)然我覺得數(shù)據(jù)集給我的感覺是顏色直方圖也是可以用的。
一黍匾、簡單粗暴的提取SIFT特征
源碼:https://github.com/SimGuo/ImageProcessing/blob/master/main.cpp
二栏渺、Bag-of-words方法
作者:Savitch
出處:http://blog.csdn.net/assiduousknight/article/details/16901427
什么是BOW
Bag-of-words模型應(yīng)用三步
接下來,我們通過上述圖像展示如何通過Bag-of-words模型锐涯,將圖像表示成數(shù)值向量】恼铮現(xiàn)在有三個目標(biāo)類,分別是人臉纹腌、自行車和吉他霎终。
-
Bag-of-words模型的第一步是利用
SIFT算法
,從每類圖像中提取視覺詞匯壶笼,將所有的視覺詞匯集合在一起神僵,如下圖所示:
提取視覺詞匯 第二步是
利用K-Means算法構(gòu)造單詞表
。K-Means算法是一種基于樣本間相似性度量的間接聚類方法覆劈,此算法以K為參數(shù)保礼,把N個對象分為K個簇,以使簇內(nèi)具有較高的相似度责语,而簇間相似度較低炮障。SIFT提取的視覺詞匯向量之間根據(jù)距離的遠(yuǎn)近,可以利用K-Means算法將詞義相近的詞匯合并坤候,作為單詞表中的基礎(chǔ)詞匯胁赢,假定我們將K設(shè)為4,那么單詞表的構(gòu)造過程如下圖所示:
- 第三步是利用單詞表的中詞匯表示圖像白筹。利用SIFT算法智末,可以從每幅圖像中提取很多個特征點(diǎn),這些特征點(diǎn)都可以用單詞表中的單詞近似代替徒河,通過統(tǒng)計(jì)單詞表中每個單詞在圖像中出現(xiàn)的次數(shù)系馆,可以將圖像表示成為一個K=4維數(shù)值向量。請看下圖:
代碼(還沒看懂)
- 配置環(huán)境
- 創(chuàng)建c++類CSIFTDiscriptor
為了方便使用顽照,我們將SIFT庫用C++類CSIFTDiscriptor封裝由蘑,該類可以計(jì)算并獲取指定圖像的特征點(diǎn)向量集合闽寡。類的聲名在SIFTDiscriptor.h文件中,內(nèi)容如下:
成員函數(shù)實(shí)現(xiàn)在SIFTDiscriptor.cpp文件中尼酿,其中爷狈,CalculateSIFT函數(shù)完成特征點(diǎn)的提取和計(jì)算,其主要內(nèi)部流程如下:#ifndef _SIFT_DISCRIPTOR_H_ #define _SIFT_DISCRIPTOR_H_ #include <string> #include <highgui.h> #include <cv.h> extern "C" { #include "../sift/sift.h" #include "../sift/imgfeatures.h" #include "../sift/utils.h" }; class CSIFTDiscriptor { public: int GetInterestPointNumber() { return m_nInterestPointNumber; } struct feature *GetFeatureArray() { return m_pFeatureArray; } public : void SetImgName(const std::string &strImgName) { m_strInputImgName = strImgName; } int CalculateSIFT(); public: CSIFTDiscriptor(const std::string &strImgName); CSIFTDiscriptor() { m_nInterestPointNumber = 0; m_pFeatureArray = NULL; } ~CSIFTDiscriptor(); private: std::string m_strInputImgName; int m_nInterestPointNumber; feature *m_pFeatureArray; }; #endif
- 調(diào)用OpenCV函數(shù)cvLoadImage加載輸入圖像裳擎;
- 為了統(tǒng)一輸入圖像的尺寸涎永,CalculateSIFT函數(shù)的第二步是調(diào)整輸入圖像的尺寸,這通過調(diào)用cvResize函數(shù)實(shí)現(xiàn)句惯;
- 如果輸入圖像是彩色圖像土辩,我們需要首先將其轉(zhuǎn)化成灰度圖,這通過調(diào)用cvCvtColor函數(shù)實(shí)現(xiàn)抢野;
- 調(diào)用SIFT庫函數(shù)sift_feature獲取輸入圖像的特征點(diǎn)向量集合和特征點(diǎn)個數(shù)。
#include "SIFTDiscriptor.h"
int CSIFTDiscriptor::CalculateSIFT()
{
IplImage *pInputImg = cvLoadImage(m_strInputImgName.c_str());
if (!pInputImg)
{
return -1;
}
int nImgWidth = 320; //訓(xùn)練用標(biāo)準(zhǔn)圖像大小
double dbScaleFactor = pInputImg->width / 300.0; //縮放因子
IplImage *pTmpImg = cvCreateImage(cvSize(pInputImg->width / dbScaleFactor, pInputImg->height / dbScaleFactor),
pInputImg->depth, pInputImg->nChannels);
cvResize(pInputImg, pTmpImg); //縮放
cvReleaseImage(&pInputImg);
if (pTmpImg->nChannels != 1) //非灰度圖
{
IplImage *pGrayImg = cvCreateImage(cvSize(pTmpImg->width, pTmpImg->height),
pTmpImg->depth, 1);
cvCvtColor(pTmpImg, pGrayImg, CV_RGB2GRAY);
m_nInterestPointNumber = sift_features(pGrayImg, &m_pFeatureArray);
cvReleaseImage(&pGrayImg);
}
else
{
m_nInterestPointNumber = sift_features(pTmpImg, &m_pFeatureArray);
}
cvReleaseImage(&pTmpImg);
return m_nInterestPointNumber;
}
CSIFTDiscriptor::CSIFTDiscriptor(const std::string &strImgName)
{
m_strInputImgName = strImgName;
m_nInterestPointNumber = 0;
m_pFeatureArray = NULL;
CalculateSIFT();
}
CSIFTDiscriptor::~CSIFTDiscriptor()
{
if (m_pFeatureArray)
{
free(m_pFeatureArray);
}
}
```
- 創(chuàng)建c++類CImgSet各墨,管理實(shí)驗(yàn)圖像集合
Bag-of-words模型需要從多個目標(biāo)類圖像中提取視覺詞匯指孤,不同目標(biāo)類的圖像存儲在不同子文件夾中,為了方便操作贬堵,我們設(shè)計(jì)了一個專門的類CImgSet用來管理圖像集合恃轩,聲明在文件ImgSet.h中:#ifndef _IMG_SET_H_ #define _IMG_SET_H_ #include <vector> #include <string> #pragma comment(lib, "shlwapi.lib") class CImgSet { public: CImgSet (const std::string &strImgDirName) : m_strImgDirName(strImgDirName+"http://"), m_nImgNumber(0){} int GetTotalImageNumber() { return m_nImgNumber; } std::string GetImgName(int nIndex) { return m_szImgs.at(nIndex); } int LoadImgsFromDir() { return LoadImgsFromDir(""); } private: int LoadImgsFromDir(const std::string &strDirName); private: typedef std::vector <std::string> IMG_SET; IMG_SET m_szImgs; int m_nImgNumber; const std::string m_strImgDirName; }; #endif //成員函數(shù)實(shí)現(xiàn)在文件ImgSet.cpp中: #include "ImgSet.h" #include <windows.h> #include <Shlwapi.h> /** strSubDirName:子文件夾名 */ int CImgSet::LoadImgsFromDir(const std::string &strSubDirName) { WIN32_FIND_DATAA stFD = {0}; std::string strDirName; if ("" == strSubDirName) { strDirName = m_strImgDirName; } else { strDirName = strSubDirName; } std::string strFindName = strDirName + "http://*"; HANDLE hFile = FindFirstFileA(strFindName.c_str(), &stFD); BOOL bExist = FindNextFileA(hFile, &stFD); for (;bExist;) { std::string strTmpName = strDirName + stFD.cFileName; if (strDirName + "." == strTmpName || strDirName + ".." == strTmpName) { bExist = FindNextFileA(hFile, &stFD); continue; } if (PathIsDirectoryA(strTmpName.c_str())) { strTmpName += "http://"; LoadImgsFromDir(strTmpName); bExist = FindNextFileA(hFile, &stFD); continue; } std::string strSubImg = strDirName + stFD.cFileName; m_szImgs.push_back(strSubImg); bExist = FindNextFileA(hFile, &stFD); } m_nImgNumber = m_szImgs.size(); return m_nImgNumber; }
LoadImgsFromDir遞歸地從圖像文件夾中獲取所有實(shí)驗(yàn)用圖像名,包括子文件夾黎做。該函數(shù)內(nèi)部通過循環(huán)調(diào)用windows API函數(shù)FindFirstFile和FindNextFile來找到文件夾中所有圖像的名稱叉跛。
- 創(chuàng)建CHistogram,生成圖像的直方圖表示
//ImgHistogram.h
#ifndef _IMG_HISTOGRAM_H_
#define _IMG_HISTOGRAM_H_
#include <string>
#include "SIFTDiscriptor.h"
#include "ImgSet.h"
const int cnClusterNumber = 1500;
const int ciMax_D = FEATURE_MAX_D;
class CHistogram
{
public:
void SetTrainingImgSetName(const std::string strTrainingImgSet)
{
m_strTrainingImgSetName = strTrainingImgSet;
}
int FormHistogram();
CvMat CalculateImgHistogram(const string strImgName, int pszImgHistogram[]);
CvMat *GetObservedData();
CvMat *GetCodebook()
{
return m_pCodebook;
}
void SetCodebook(CvMat *pCodebook)
{
m_pCodebook = pCodebook;
m_bSet = true;
}
public:
CHistogram():m_pszHistogram(0), m_nImgNumber(0), m_pObservedData(0), m_pCodebook(0), m_bSet(false){}
~CHistogram()
{
if (m_pszHistogram)
{
delete m_pszHistogram;
m_pszHistogram = 0;
}
if (m_pObservedData)
{
cvReleaseMat(&m_pObservedData);
m_pObservedData = 0;
}
if (m_pCodebook && !m_bSet)
{
cvReleaseMat(&m_pCodebook);
m_pCodebook = 0;
}
}
private :
bool m_bSet;
CvMat *m_pCodebook;
CvMat *m_pObservedData;
std::string m_strTrainingImgSetName;
int (*m_pszHistogram)[cnClusterNumber];
int m_nImgNumber;
};
#endif
#include "ImgHistogram.h"
int CHistogram::FormHistogram()
{
int nRet = 0;
CImgSet iImgSet(m_strTrainingImgSetName);
nRet = iImgSet.LoadImgsFromDir();
const int cnTrainingImgNumber = iImgSet.GetTotalImageNumber();
m_nImgNumber = cnTrainingImgNumber;
CSIFTDiscriptor *pDiscriptor = new CSIFTDiscriptor[cnTrainingImgNumber];
int nIPNumber(0) ;
for (int i = 0; i < cnTrainingImgNumber; ++i) //計(jì)算每一幅訓(xùn)練圖像的SIFT描述符
{
const string strImgName = iImgSet.GetImgName(i);
pDiscriptor[i].SetImgName(strImgName);
pDiscriptor[i].CalculateSIFT();
nIPNumber += pDiscriptor[i].GetInterestPointNumber();
}
double (*pszDiscriptor)[FEATURE_MAX_D] = new double[nIPNumber][FEATURE_MAX_D]; //存儲所有描述符的數(shù)組蒸殿。每一行代表一個IP的描述符
ZeroMemory(pszDiscriptor, sizeof(int) * nIPNumber * FEATURE_MAX_D);
int nIndex = 0;
for (int i = 0; i < cnTrainingImgNumber; ++i) //遍歷所有圖像
{
struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();
int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();
for (int j = 0; j < nFeatureNumber; ++j) //遍歷一幅圖像中所有的IP(Interesting Point興趣點(diǎn)
{
for (int k = 0; k < FEATURE_MAX_D; k++)//初始化一個IP描述符
{
pszDiscriptor[nIndex][k] = pFeatureArray[j].descr[k];
}
++nIndex;
}
}
CvMat *pszLabels = cvCreateMat(nIPNumber, 1, CV_32SC1);
//對所有IP的描述符筷厘,執(zhí)行KMeans算法,找到cnClusterNumber個聚類中心宏所,存儲在pszClusterCenters中
if (!m_pCodebook) //構(gòu)造碼元表
{
CvMat szSamples,
*pszClusterCenters = cvCreateMat(cnClusterNumber, FEATURE_MAX_D, CV_32FC1);
cvInitMatHeader(&szSamples, nIPNumber, FEATURE_MAX_D, CV_32FC1, pszDiscriptor);
cvKMeans2(&szSamples, cnClusterNumber, pszLabels,
cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),
1, (CvRNG *)0, 0, pszClusterCenters); //
m_pCodebook = pszClusterCenters;
}
m_pszHistogram = new int[cnTrainingImgNumber][cnClusterNumber]; //存儲每幅圖像的直方圖表示酥艳,每一行對應(yīng)一幅圖像
ZeroMemory(m_pszHistogram, sizeof(int) * cnTrainingImgNumber * cnClusterNumber);
//計(jì)算每幅圖像的直方圖
nIndex = 0;
for (int i = 0; i < cnTrainingImgNumber; ++i)
{
struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();
int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();
// int nIndex = 0;
for (int j = 0; j < nFeatureNumber; ++j)
{
// CvMat szFeature;
// cvInitMatHeader(&szFeature, 1, FEATURE_MAX_D, CV_32FC1, pszDiscriptor[nIndex++]);
// double dbMinimum = 1.79769e308;
// int nCodebookIndex = 0;
// for (int k = 0; k < m_pCodebook->rows; ++k)//找到距離最小的碼元,用最小碼元代替原//來的詞匯
// {
// CvMat szCode = cvMat(1, m_pCodebook->cols, m_pCodebook->type);
// cvGetRow(m_pCodebook, &szCode, k);
// double dbDistance = cvNorm(&szFeature, &szCode, CV_L2);
// if (dbDistance < dbMinimum)
// {
// dbMinimum = dbDistance;
// nCodebookIndex = k;
// }
// }
int nCodebookIndex = pszLabels->data.i[nIndex++]; //找到第i幅圖像中第j個IP在Codebook中的索引值nCodebookIndex
++m_pszHistogram[i][nCodebookIndex]; //0<nCodebookIndex<cnClusterNumber;
}
}
//資源清理爬骤,函數(shù)返回
// delete []m_pszHistogram;
// m_pszHistogram = 0;
cvReleaseMat(&pszLabels);
// cvReleaseMat(&pszClusterCenters);
delete []pszDiscriptor;
delete []pDiscriptor;
return nRet;
}
//double descr_dist_sq( struct feature* f1, struct feature* f2 );
CvMat CHistogram::CalculateImgHistogram(const string strImgName, int pszImgHistogram[])
{
if ("" == strImgName || !m_pCodebook || !pszImgHistogram)
{
return CvMat();
}
CSIFTDiscriptor iImgDisp;
iImgDisp.SetImgName(strImgName);
iImgDisp.CalculateSIFT();
struct feature *pImgFeature = iImgDisp.GetFeatureArray();
int cnIPNumber = iImgDisp.GetInterestPointNumber();
// int *pszImgHistogram = new int[cnClusterNumber];
// ZeroMemory(pszImgHistogram, sizeof(int)*cnClusterNumber);
for (int i = 0; i < cnIPNumber; ++i)
{
double *pszDistance = new double[cnClusterNumber];
CvMat iIP = cvMat(FEATURE_MAX_D, 1, CV_32FC1, pImgFeature[i].descr);
for (int j = 0; j < cnClusterNumber; ++j)
{
CvMat iCode = cvMat(1, FEATURE_MAX_D, CV_32FC1);
cvGetRow(m_pCodebook, &iCode, j);
CvMat *pTmpMat = cvCreateMat(FEATURE_MAX_D, 1, CV_32FC1);
cvTranspose(&iCode, pTmpMat);
double dbDistance = cvNorm(&iIP, pTmpMat); //計(jì)算第i個IP與第j個code之間的距離
pszDistance[j] = dbDistance;
cvReleaseMat(&pTmpMat);
}
double dbMinDistance = pszDistance[0];
int nCodebookIndex = 0; //第i個IP在codebook中距離最小的code的索引值
for (int j = 1; j < cnClusterNumber; ++j)
{
if (dbMinDistance > pszDistance[j])
{
dbMinDistance = pszDistance[j];
nCodebookIndex = j;
}
}
++pszImgHistogram[nCodebookIndex];
delete []pszDistance;
}
CvMat iImgHistogram = cvMat(cnClusterNumber, 1, CV_32SC1, pszImgHistogram);
return iImgHistogram;
}
CvMat *CHistogram::GetObservedData()
{
CvMat iHistogram;
cvInitMatHeader(&iHistogram, m_nImgNumber, cnClusterNumber, CV_32SC1, m_pszHistogram);
CvMat *m_pObservedData = cvCreateMat(iHistogram.cols, iHistogram.rows, CV_32SC1);
cvTranspose(&iHistogram, m_pObservedData);
return m_pObservedData;
}