HOG(Histogram of Oriented Gradients)是一種表示圖像特征量的方法。特征量是表示圖像的狀態(tài)等的向量集合忧额。
- 在圖像識別(圖像是什么)和檢測(物體在圖像中的哪個位置)中,我們需要:
- 從圖像中獲取特征量(特征提壤⒖凇)睦番;
- 基于特征量識別和檢測(識別和檢測)。
- 通過以下算法獲得HOG:
- 圖像灰度化之后耍属,在x方向和y方向上求出亮度的梯度:
- x方向托嚣,
- y方向,
- 從gx和gy確定梯度幅值和梯度方向
- 梯度幅值
- 梯度方向
- 將梯度方向[0,180]進(jìn)行9等分量化厚骗。也就是說示启,對于[0,20]量化為 index 0,對于[20,40]量化為 index 1
- 將圖像劃分為N×N個區(qū)域(該區(qū)域稱為 cell)领舰,并作出 cell 內(nèi)步驟3得到的 index 的直方圖夫嗓。
- C x C個 cell 被稱為一個 block。對每個 block 內(nèi)的 cell 的直方圖通過下面的式子進(jìn)行歸一化冲秽。由于歸一化過程中窗口一次移動一個 cell 來完成的舍咖,因此一個 cell 會被歸一化多次,通常?=1:
- 以上锉桑,求出 HOG 特征值谎仲。
- 綜上來說,前三步還是比較簡單的刨仑,非常常規(guī)郑诺,圖像轉(zhuǎn)灰度,然后求出x方向y方向上的梯度杉武,結(jié)合x方向和y方向的梯度辙诞,求出梯度幅值矩陣和梯度方向矩陣,對梯度方向矩陣進(jìn)行量化轻抱,將方向歸一到0-8九個值
- 第四步有些難度飞涂,將圖像按
的塊進(jìn)行切分,比如原圖像為高240寬240的圖像祈搜,切分后就變成高上有30寬上有30的900個小塊较店,每個小塊上結(jié)合梯度幅度圖和量化后的梯度方向圖,將梯度幅度歸類到0-8對應(yīng)的九個梯度方向上容燕,這樣很類似直方圖歸一到0-255的256個位置梁呈,不過這里是9個位置
- 第五步同樣理解起來很累,在第四步的基礎(chǔ)上蘸秘,以3X3的九個小塊作為一個單元進(jìn)行歸一化官卡,就是按照公式把第四個步驟中的每個小塊在其范圍內(nèi)的9個塊中進(jìn)行歸一
- 第六步輪到畫方向了,根據(jù)梯度方向找出計(jì)算出初始坐標(biāo)醋虏,終點(diǎn)坐標(biāo)寻咒,設(shè)置線寬,線的顏色就開始畫線
import cv2#導(dǎo)入opencv\numpy\matplotlib庫
import numpy as np
import matplotlib.pyplot as plt
# get HOG step1
def HOG_step1(img):#HOG第一步函數(shù)
# Grayscale
def BGR2GRAY(img):#轉(zhuǎn)灰度颈嚼,注意numpy的寫法
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Magnitude and gradient計(jì)算幅度和梯度
def get_gradXY(gray):#計(jì)算梯度毛秘,相當(dāng)于還是在x、y方向上做一階差分阻课,類似sobel濾波
H, W = gray.shape
# padding before grad
gray = np.pad(gray, (1, 1), 'edge')#numpy的一種寫法叫挟,擴(kuò)充外圍一圈為0
# get grad x
gx = gray[1:H+1, 2:] - gray[1:H+1, :W]#x方向上做差分
# get grad y
gy = gray[2:, 1:W+1] - gray[:H, 1:W+1]#y方向做差分
# replace 0 with
gx[gx == 0] = 1e-6#因?yàn)楹笃谟?jì)算幅度時要用除法,消除gx里面的0
return gx, gy
# get magnitude and gradient得到幅度和梯度
def get_MagGrad(gx, gy):
# get gradient maginitude
magnitude = np.sqrt(gx ** 2 + gy ** 2)#幅度計(jì)算公式
# get gradient angle#梯度計(jì)算公式
gradient = np.arctan(gy / gx)
gradient[gradient < 0] = np.pi / 2 + gradient[gradient < 0] + np.pi / 2#消除梯度方向的負(fù)值
return magnitude, gradient
# Gradient histogram梯度直方圖
def quantization(gradient):#對梯度進(jìn)行量化
# prepare quantization table#準(zhǔn)備量化表格
gradient_quantized = np.zeros_like(gradient, dtype=np.int)
# quantization base量化基
d = np.pi / 9#以20°作為一個基準(zhǔn)
# quantization
for i in range(9):
gradient_quantized[np.where((gradient >= d * i) & (gradient <= d * (i + 1)))] = i#將gradient_quantized矩陣中的值歸一到1-9
return gradient_quantized
# 1. BGR -> Gray
gray = BGR2GRAY(img)
# 1. Gray -> Gradient x and y
gx, gy = get_gradXY(gray)
# 2. get gradient magnitude and angle
magnitude, gradient = get_MagGrad(gx, gy)
# 3. Quantization
gradient_quantized = quantization(gradient)
return magnitude, gradient_quantized
# Read image
img = cv2.imread("123.jpg").astype(np.float32)
# get HOG step1
magnitude, gradient_quantized = HOG_step1(img)
# Write gradient magnitude to file
_magnitude = (magnitude / magnitude.max() * 255).astype(np.uint8)#將幅度歸一到0-255
cv2.imwrite("out_mag.jpg", _magnitude)
# Write gradient angle to file
H, W, C = img.shape
out = np.zeros((H, W, 3), dtype=np.uint8)
# define color定義對應(yīng)0-9的九種顏色
C = [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [255, 0, 255], [0, 255, 255],
[127, 127, 0], [127, 0, 127], [0, 127, 127]]
# draw color
for i in range(9):
out[gradient_quantized == i] = C[i]#畫出量化后賦予不同顏色的梯度方向圖像
cv2.imwrite("out_gra.jpg", out)
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
總結(jié)一下上述代碼
- 圖像轉(zhuǎn)灰度
- 圖像進(jìn)行x方向和y方向上的一階差分
- 由步驟2得到的兩個矩陣構(gòu)造出幅度矩陣和梯度方向矩陣
- 對梯度方向矩陣進(jìn)行量化柑肴,給定0-9對應(yīng)的標(biāo)簽
- 為0-9對應(yīng)的標(biāo)簽賦予不同的顏色顯示
import cv2
import numpy as np
import matplotlib.pyplot as plt
# get HOG step2
def HOG_step2(img):
# Grayscale
def BGR2GRAY(img):#轉(zhuǎn)灰度
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Magnitude and gradient
def get_gradXY(gray):#得到x霞揉、y方向上的梯度
H, W = gray.shape
# padding before grad
gray = np.pad(gray, (1, 1), 'edge')
# get grad x
gx = gray[1:H+1, 2:] - gray[1:H+1, :W]
# get grad y
gy = gray[2:, 1:W+1] - gray[:H, 1:W+1]
# replace 0 with
gx[gx == 0] = 1e-6
return gx, gy
# get magnitude and gradient
def get_MagGrad(gx, gy):#得到幅度矩陣和梯度方向矩陣
# get gradient maginitude
magnitude = np.sqrt(gx ** 2 + gy ** 2)
# get gradient angle
gradient = np.arctan(gy / gx)
gradient[gradient < 0] = np.pi / 2 + gradient[gradient < 0] + np.pi / 2
return magnitude, gradient
# Gradient histogram
def quantization(gradient):#對梯度方向矩陣進(jìn)行量化
# prepare quantization table
gradient_quantized = np.zeros_like(gradient, dtype=np.int)
# quantization base
d = np.pi / 9
# quantization
for i in range(9):
gradient_quantized[np.where((gradient >= d * i) & (gradient <= d * (i + 1)))] = i
return gradient_quantized
# get gradient histogram
def gradient_histogram(gradient_quantized, magnitude, N=8):#將量化之后的矩陣、幅度矩陣以及cell大小N=8的參數(shù)傳入梯度直方圖函數(shù)
# get shape
H, W = magnitude.shape
# get cell num
cell_N_H = H // N
cell_N_W = W // N
histogram = np.zeros((cell_N_H, cell_N_W, 9), dtype=np.float32)#構(gòu)造直方圖矩陣晰骑,相當(dāng)于高和寬縮小N倍适秩,但是加了9個通道,對應(yīng)9個量化之后的梯度方向硕舆,在每個通道賦予不同的顏色
# each pixel
for y in range(cell_N_H):
for x in range(cell_N_W):
for j in range(N):
for i in range(N):#舉例y=x=j=i=0則下式為
histogram[y, x, gradient_quantized[y * 4 + j, x * 4 + i]] += magnitude[y * 4 + j, x * 4 + i]#計(jì)算hisogram每個像素每個通道的取值
return histogram#返回直方圖
# 1. BGR -> Gray
gray = BGR2GRAY(img)
# 1. Gray -> Gradient x and y
gx, gy = get_gradXY(gray)
# 2. get gradient magnitude and angle
magnitude, gradient = get_MagGrad(gx, gy)
# 3. Quantization
gradient_quantized = quantization(gradient)
# 4. Gradient histogram
histogram = gradient_histogram(gradient_quantized, magnitude)
return histogram
# Read image
img = cv2.imread("123.jpg").astype(np.float32)
# get HOG step2
histogram = HOG_step2(img)
# write histogram to file
for i in range(9):#畫出每個通道的圖像
plt.subplot(3,3,i+1)
plt.imshow(histogram[..., i])
plt.axis('off')
plt.xticks(color="None")
plt.yticks(color="None")
plt.savefig("out.png")
plt.show()
對上述代碼總結(jié)以下
- 圖像轉(zhuǎn)灰度
- 圖像進(jìn)行x方向和y方向上的一階差分
- 由步驟2得到的兩個矩陣構(gòu)造出幅度矩陣和梯度方向矩陣
- 對梯度方向矩陣進(jìn)行量化秽荞,給定0-9對應(yīng)的標(biāo)簽
- 取N=8,8×8個像素為一個 cell抚官,將每個 cell 的梯度幅值加到梯度方向的index處扬跋,因?yàn)橐还灿芯艂€梯度方向,因此histogram第三個維度大小為9凌节。
import cv2
import numpy as np
import matplotlib.pyplot as plt
# get HOG
def HOG(img):
# Grayscale
def BGR2GRAY(img):#轉(zhuǎn)灰度
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Magnitude and gradient
def get_gradXY(gray):
H, W = gray.shape
# padding before grad
gray = np.pad(gray, (1, 1), 'edge')
# get grad x
gx = gray[1:H + 1, 2:] - gray[1:H + 1, :W]
# get grad y
gy = gray[2:, 1:W + 1] - gray[:H, 1:W + 1]
# replace 0 with
gx[gx == 0] = 1e-6
return gx, gy
# get magnitude and gradient
def get_MagGrad(gx, gy):
# get gradient maginitude計(jì)算x方向和y方向的梯度
magnitude = np.sqrt(gx ** 2 + gy ** 2)
# get gradient angle
gradient = np.arctan(gy / gx)
gradient[gradient < 0] = np.pi / 2 + gradient[gradient < 0] + np.pi / 2
return magnitude, gradient
# Gradient histogram
def quantization(gradient):
# prepare quantization table準(zhǔn)備量化表格
gradient_quantized = np.zeros_like(gradient, dtype=np.int)
# quantization base
d = np.pi / 9
# quantization量化梯度方向
for i in range(9):
gradient_quantized[np.where((gradient >= d * i) & (gradient <= d * (i + 1)))] = i
return gradient_quantized
# get gradient histogram得到梯度直方圖
def gradient_histogram(gradient_quantized, magnitude, N=8):
# get shape
H, W = magnitude.shape
# get cell num
cell_N_H = H // N
cell_N_W = W // N
histogram = np.zeros((cell_N_H, cell_N_W, 9), dtype=np.float32)
# each pixel
for y in range(cell_N_H):
for x in range(cell_N_W):
for j in range(N):
for i in range(N):
histogram[y, x, gradient_quantized[y * 4 + j, x * 4 + i]] += magnitude[y * 4 + j, x * 4 + i]
return histogram
# histogram normalization直方圖歸一化钦听,歸一化函數(shù)為最上面提到的
def normalization(histogram, C=3, epsilon=1):
cell_N_H, cell_N_W, _ = histogram.shape
## each histogram
for y in range(cell_N_H):
for x in range(cell_N_W):
# for i in range(9):
histogram[y, x] /= np.sqrt(np.sum(histogram[max(y - 1, 0): min(y + 2, cell_N_H),
max(x - 1, 0): min(x + 2, cell_N_W)] ** 2) + epsilon)
return histogram
# 1. BGR -> Gray
gray = BGR2GRAY(img)
# 1. Gray -> Gradient x and y
gx, gy = get_gradXY(gray)
# 2. get gradient magnitude and angle
magnitude, gradient = get_MagGrad(gx, gy)
# 3. Quantization
gradient_quantized = quantization(gradient)
# 4. Gradient histogram
histogram = gradient_histogram(gradient_quantized, magnitude)
# 5. Histogram normalization
histogram = normalization(histogram)
return histogram
# Read image
img = cv2.imread("123.jpg").astype(np.float32)
# get HOG
histogram = HOG(img)
# Write result to file
for i in range(9):
plt.subplot(3, 3, i + 1)
plt.imshow(histogram[..., i])
plt.axis('off')
plt.xticks(color="None")
plt.yticks(color="None")
plt.savefig("out.png")
plt.show()
最終的完整代碼
import cv2
import numpy as np
import matplotlib.pyplot as plt
# get HOG
def HOG(img):
# Grayscale
def BGR2GRAY(img):#轉(zhuǎn)灰度
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
# Magnitude and gradient
def get_gradXY(gray):#x方向和y方向梯度
H, W = gray.shape
# padding before grad
gray = np.pad(gray, (1, 1), 'edge')
# get grad x
gx = gray[1:H + 1, 2:] - gray[1:H + 1, :W]
# get grad y
gy = gray[2:, 1:W + 1] - gray[:H, 1:W + 1]
# replace 0 with
gx[gx == 0] = 1e-6
return gx, gy
# get magnitude and gradient
def get_MagGrad(gx, gy):#梯度幅度和方向
# get gradient maginitude
magnitude = np.sqrt(gx ** 2 + gy ** 2)
# get gradient angle
gradient = np.arctan(gy / gx)
gradient[gradient < 0] = np.pi / 2 + gradient[gradient < 0] + np.pi / 2
return magnitude, gradient
# Gradient histogram
def quantization(gradient):#梯度方向量化
# prepare quantization table
gradient_quantized = np.zeros_like(gradient, dtype=np.int)
# quantization base
d = np.pi / 9
# quantization
for i in range(9):
gradient_quantized[np.where((gradient >= d * i) & (gradient <= d * (i + 1)))] = i
return gradient_quantized
# get gradient histogram
def gradient_histogram(gradient_quantized, magnitude, N=8):#梯度直方圖
# get shape
H, W = magnitude.shape
# get cell num
cell_N_H = H // N
cell_N_W = W // N
histogram = np.zeros((cell_N_H, cell_N_W, 9), dtype=np.float32)
# each pixel
for y in range(cell_N_H):
for x in range(cell_N_W):
for j in range(N):
for i in range(N):
histogram[y, x, gradient_quantized[y * 4 + j, x * 4 + i]] += magnitude[y * 4 + j, x * 4 + i]
return histogram
# histogram normalization
def normalization(histogram, C=3, epsilon=1):#直方圖歸一化
cell_N_H, cell_N_W, _ = histogram.shape
## each histogram
for y in range(cell_N_H):
for x in range(cell_N_W):
# for i in range(9):
histogram[y, x] /= np.sqrt(np.sum(histogram[max(y - 1, 0): min(y + 2, cell_N_H),
max(x - 1, 0): min(x + 2, cell_N_W)] ** 2) + epsilon)
return histogram
# 1. BGR -> Gray
gray = BGR2GRAY(img)
# 1. Gray -> Gradient x and y
gx, gy = get_gradXY(gray)
# 2. get gradient magnitude and angle
magnitude, gradient = get_MagGrad(gx, gy)
# 3. Quantization
gradient_quantized = quantization(gradient)
# 4. Gradient histogram
histogram = gradient_histogram(gradient_quantized, magnitude)
# 5. Histogram normalization
histogram = normalization(histogram)
return histogram
# draw HOG
def draw_HOG(img, histogram):#將梯度直方圖疊加到原灰度圖像中
# Grayscale
def BGR2GRAY(img):
gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
return gray
def draw(gray, histogram, N=8):
# get shape
H, W = gray.shape
cell_N_H, cell_N_W, _ = histogram.shape
## Draw
out = gray[1: H + 1, 1: W + 1].copy().astype(np.uint8)
for y in range(cell_N_H):#對每個小塊畫線
for x in range(cell_N_W):
cx = x * N + N // 2
cy = y * N + N // 2
x1 = cx + N // 2 - 1
y1 = cy
x2 = cx - N // 2 + 1
y2 = cy
h = histogram[y, x] / np.sum(histogram[y, x])
h /= h.max()
for c in range(9):#對每個方向畫線
# angle = (20 * c + 10 - 90) / 180. * np.pi
# get angle
angle = (20 * c + 10) / 180. * np.pi
rx = int(np.sin(angle) * (x1 - cx) + np.cos(angle) * (y1 - cy) + cx)
ry = int(np.cos(angle) * (x1 - cx) - np.cos(angle) * (y1 - cy) + cy)
lx = int(np.sin(angle) * (x2 - cx) + np.cos(angle) * (y2 - cy) + cx)
ly = int(np.cos(angle) * (x2 - cx) - np.cos(angle) * (y2 - cy) + cy)
# color is HOG value
c = int(255. * h[c])
# draw line
cv2.line(out, (lx, ly), (rx, ry), (c, c, c), thickness=1)#設(shè)置線形
return out
# get gray
gray = BGR2GRAY(img)
# draw HOG
out = draw(gray, histogram)
return out
# Read image
img = cv2.imread("123.jpg").astype(np.float32)
# get HOG
histogram = HOG(img)
# draw HOG
out = draw_HOG(img, histogram)
# Save result
cv2.imwrite("out.jpg", out)
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.destroyAllWindows()
- 綜上來說洒试,前三步還是比較簡單的,非常常規(guī)朴上,圖像轉(zhuǎn)灰度垒棋,然后求出x方向y方向上的梯度,結(jié)合x方向和y方向的梯度痪宰,求出梯度幅值矩陣和梯度方向矩陣叼架,對梯度方向矩陣進(jìn)行量化,將方向歸一到0-8九個值
- 第四步有些難度衣撬,將圖像按
的塊進(jìn)行切分乖订,比如原圖像為高240寬240的圖像,切分后就變成高上有30寬上有30的900個小塊具练,每個小塊上結(jié)合梯度幅度圖和量化后的梯度方向圖乍构,將梯度幅度歸類到0-8對應(yīng)的九個梯度方向上,這樣很類似直方圖歸一到0-255的256個位置靠粪,不過這里是9個位置
- 第五步同樣理解起來很累蜡吧,在第四步的基礎(chǔ)上,以3X3的九個小塊作為一個單元進(jìn)行歸一化占键,就是按照公式把第四個步驟中的每個小塊在其范圍內(nèi)的9個塊中進(jìn)行歸一
- 第六步輪到畫方向了昔善,根據(jù)梯度方向找出計(jì)算出初始坐標(biāo),終點(diǎn)坐標(biāo)畔乙,設(shè)置線寬君仆,線的顏色就開始畫線
色彩追蹤
- 色彩追蹤是提取特定顏色的區(qū)域的方法。
- 然而牲距,由于在 RGB 色彩空間內(nèi)顏色有2563種返咱,因此十分困難(或者說手動提取相當(dāng)困難),因此進(jìn)行 HSV 變換牍鞠。
- HSV 變換在之前提到過咖摹,是將 RGB 變換到色相(Hue)、飽和度(Saturation)难述、明度(Value)的方法萤晴。
- 飽和度越小越白,飽和度越大顏色越濃烈胁后,0≤S≤1店读;
- 明度數(shù)值越高越接近白色,數(shù)值越低越接近黑色(0≤V≤1)攀芯;
- 色相:將顏色使用0到360度表示屯断,具體色相與數(shù)值按下表對應(yīng)
紅 | 黃 | 綠 | 青色 | 藍(lán)色 | 品紅 | 紅 |
---|---|---|---|---|---|---|
- 也就是說,為了追蹤藍(lán)色,可以在進(jìn)行 HSV 轉(zhuǎn)換后提取其中180≤H≤260的位置殖演,將其變?yōu)?55氧秘。
def BGR2HSV(_img):
img = _img.copy() / 255.
hsv = np.zeros_like(img, dtype=np.float32)
# get max and min
max_v = np.max(img, axis=2).copy()
min_v = np.min(img, axis=2).copy()
min_arg = np.argmin(img, axis=2)
# H
hsv[..., 0][np.where(max_v == min_v)]= 0
## if min == B
ind = np.where(min_arg == 0)
hsv[..., 0][ind] = 60 * (img[..., 1][ind] - img[..., 2][ind]) / (max_v[ind] - min_v[ind]) + 60
## if min == R
ind = np.where(min_arg == 2)
hsv[..., 0][ind] = 60 * (img[..., 0][ind] - img[..., 1][ind]) / (max_v[ind] - min_v[ind]) + 180
## if min == G
ind = np.where(min_arg == 1)
hsv[..., 0][ind] = 60 * (img[..., 2][ind] - img[..., 0][ind]) / (max_v[ind] - min_v[ind]) + 300
# S
hsv[..., 1] = max_v.copy() - min_v.copy()
# V
hsv[..., 2] = max_v.copy()
return hsv
# make mask
def get_mask(hsv):#構(gòu)造掩膜,把匹配到的圖像提取出來
mask = np.zeros_like(hsv[..., 0])
#mask[np.where((hsv > 180) & (hsv[0] < 260))] = 255
mask[np.logical_and((hsv[..., 0] > 180), (hsv[..., 0] < 260))] = 255
return mask
# Read image
img = cv2.imread("imori.jpg").astype(np.float32)
# RGB > HSV
hsv = BGR2HSV(img)
# color tracking
mask = get_mask(hsv)
out = mask.astype(np.uint8)
# Save result
cv2.imwrite("out.png", out)
cv2.imshow("result", out)