為了創(chuàng)建一個目標(biāo)識別系統(tǒng)轨蛤,需要從每張圖像中提取特征向量。每張圖像需要有一個識別標(biāo)志豹储,以用于匹配。
我們用視覺碼本的概念來創(chuàng)建圖像識別標(biāo)志宰缤。在訓(xùn)練數(shù)據(jù)集中,碼本實(shí)際上就是一個字典晃洒,用于提出關(guān)于圖像的描述慨灭,我們用向量量化方法將很多特征點(diǎn)進(jìn)行聚類并得出中心點(diǎn),這些中心點(diǎn)將作為視覺碼本的元素球及。
訓(xùn)練數(shù)據(jù)集
包含3類實(shí)例訓(xùn)練數(shù)據(jù)集氧骤,每一類包含20幅圖像,可以在http://www.vision.caltech.edu/html-files/archive.html 下載吃引。
處理加載數(shù)據(jù)集:
def load_training_data(input_folder):
training_data = [] # 以list的形式 存儲數(shù)據(jù)集中的圖片信息
if not os.path.isdir(input_folder):
raise IOError("The folder " + input_folder + " doesn't exist")
for root, dirs, files in os.walk(input_folder):
for filename in (x for x in files if x.endswith('.jpg')):
filepath = os.path.join(root, filename)
# filepath 輸出為 'training_images/airplanes\\0001.jpg'
filepath = filepath.replace('\\','/')
# 替換字符\\ 以方便處理 提取label 此時(shí)filepath 輸出為:'training_images/airplanes/0001.jpg'
object_class = filepath.split('/')[-2]
# 此時(shí) object_class 為:airplanes
# 將每幅圖像的信息以字典的形式保存在 training_data
training_data.append({'object_class': object_class,
'image_path': filepath})
return training_data
提取圖片的特征:
class FeatureBuilder(object):
'''
定義一個從輸入圖像提取特征的方法筹陵,
用star檢測器獲取關(guān)鍵點(diǎn)刽锤,然后用SIFT提取這些位置的描述信息
'''
# 提取圖片的特征
def extract_features(self, img):
#用Start獲取關(guān)鍵點(diǎn),
keypoints = StarFeatureDetector().detect(img)
# 用SIFT提取關(guān)鍵點(diǎn)的位置信息朦佩,keypoint是list類型并思。
keypoints, feature_vectors = compute_sift_features(img, keypoints)
# feature_vectors 是numpy.ndarray類型
return feature_vectors
def get_codewords(self, input_map, scaling_size, max_samples=12):
#max_samples:定義每類樣本數(shù)據(jù)的最大樣本數(shù):如果大于最大樣本數(shù)則后面相同樣本的數(shù)據(jù)就跳過
#input_map是所有樣本數(shù)據(jù)的label和位置路徑信息即訓(xùn)練數(shù)據(jù),list類型
keypoints_all = []
#用 keypoints_all 存儲所有圖片的關(guān)鍵點(diǎn)特征信息
count = 0
cur_class = ''
for item in input_map:
# item是樣本的 信息
#例如:{'image_path': 'training_images/airplanes/0001.jpg', 'object_class': 'airplanes'}
# 如果大于樣本數(shù)則跳過此樣本 即: continue
if count >= max_samples:
if cur_class != item['object_class']:
count = 0
else:
continue
count += 1
if count == max_samples:
print("Built centroids for", item['object_class'])
# cur_class 記錄當(dāng)前樣本的lebel, 然后讀取圖像
cur_class = item['object_class']
img = cv2.imread(item['image_path'])
img = resize_image(img, scaling_size)
num_dims = 128
# 獲取樣本圖像的 keypoint 關(guān)鍵點(diǎn)信息
feature_vectors = self.extract_features(img)
# 將keypoint 關(guān)鍵點(diǎn)信息 存儲在 keypoints_all中
keypoints_all.extend(feature_vectors)
#對 keypoints_all 進(jìn)行聚類
kmeans, centroids = BagOfWords().cluster(keypoints_all)
return kmeans, centroids
定義一個類來處理詞袋模型和向量量化
class BagOfWords(object):
def __init__(self, num_clusters=32):
self.num_dims = 128
self.num_clusters = num_clusters
self.num_retries = 10
# 用kmeans聚類來實(shí)現(xiàn)量化數(shù)據(jù)點(diǎn)
def cluster(self, datapoints):
kmeans = KMeans(self.num_clusters,
n_init=max(self.num_retries, 1),
max_iter=10, tol=1.0)
#提取中心點(diǎn)
res = kmeans.fit(datapoints)
centroids = res.cluster_centers_
return kmeans, centroids
# 歸一化數(shù)據(jù)
def normalize(self, input_data):
sum_input = np.sum(input_data)
if sum_input > 0:
return input_data / sum_input
else:
return input_data
# 獲得圖像的特征向量
def construct_feature(self, img, kmeans, centroids):
#獲取圖像的keypoints和位置信息
keypoints = StarFeatureDetector().detect(img)
keypoints, feature_vectors = compute_sift_features(img, keypoints)
# 用kmeans預(yù)測一幅圖片的label
labels = kmeans.predict(feature_vectors)
feature_vector = np.zeros(self.num_clusters)
# 創(chuàng)建直方圖將其歸一化
for i, item in enumerate(feature_vectors):
feature_vector[labels[i]] += 1
feature_vector_img = np.reshape(feature_vector,
((1, feature_vector.shape[0])))
return self.normalize(feature_vector_img)
輸入圖像提取特征然后映射到某一類
def compute_sift_features(img, keypoints):
if img is None:
raise TypeError('Invalid input image')
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
keypoints, descriptors = cv2.xfeatures2d.SIFT_create().compute(img_gray, keypoints)
return keypoints, descriptors
定義一個
def get_feature_map(input_map, kmeans, centroids, scaling_size):
feature_map = []
for item in input_map:
temp_dict = {}
temp_dict['object_class'] = item['object_class']
print("Extracting features for", item['image_path'])
img = cv2.imread(item['image_path'])
img = resize_image(img, scaling_size)
temp_dict['feature_vector'] = BagOfWords().construct_feature(
img, kmeans, centroids)
if temp_dict['feature_vector'] is not None:
feature_map.append(temp_dict)
return feature_map
resize_image
def resize_image(input_img, new_size):
h, w = input_img.shape[:2]
scaling_factor = new_size / float(h)
if w < h:
scaling_factor = new_size / float(w)
new_shape = (int(w * scaling_factor), int(h * scaling_factor))
return cv2.resize(input_img, new_shape)
Star檢測器
class StarFeatureDetector(object):
def __init__(self):
self.detector = cv2.xfeatures2d.StarDetector_create()
def detect(self, img):
return self.detector.detect(img)
主文件import
# -*- coding:utf8 -*-
import os
import sys
import argparse
# import cPickle as pickle
import pickle as pickle
import json
import cv2
import numpy as np
from sklearn.cluster import KMeans
- 在pycharm里編輯輸入信息 方便調(diào)試
if __name__ == '__main__':
data_folder = 'training_images/'
scaling_size = 200
codebook_file = 'codebook/9_8.pkl'
feature_map_file = 'feature_map/9_8.pkl'
training_data = load_training_data(data_folder)
# Build the visual codebook
print("====== Building visual codebook ======")
kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
if codebook_file:
with open(codebook_file, 'wb+') as f:
pickle.dump((kmeans, centroids), f)
# Extract features from input images
print("\n====== Building the feature map ======")
feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
if feature_map_file:
with open(feature_map_file, 'wb+') as f:
pickle.dump(feature_map, f)
- 命令行方式運(yùn)行文件
# 定義命令行輸入方式
def build_arg_parser():
parser = argparse.ArgumentParser(description='Extract features from a given \
set of images')
parser.add_argument("--data-folder", dest="data_folder", required=True,
help="Folder containing the training images organized in subfolders")
parser.add_argument("--codebook-file", dest='codebook_file', required=True,
help="Output file where the codebook will be stored")
parser.add_argument("--feature-map-file", dest='feature_map_file', required=True,
help="Output file where the feature map will be stored")
parser.add_argument("--scaling-size", dest="scaling_size", type=int,
default=200, help="Scales the longer dimension of the image down \
to this size.")
return parser
if __name__ == '__main__':
args = build_arg_parser().parse_args()
data_folder = args.data_folder
scaling_size = args.scaling_size
# Load the training data
training_data = load_training_data(data_folder)
# Build the visual codebook
print("====== Building visual codebook ======")
kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
if args.codebook_file:
with open(args.codebook_file, 'wb+') as f:
pickle.dump((kmeans, centroids), f)
# Extract features from input images
print("\n====== Building the feature map ======")
feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
if args.feature_map_file:
with open(args.feature_map_file, 'wb+') as f:
pickle.dump(feature_map, f)