標(biāo)簽(空格分隔): Linux 深度學(xué)習(xí)
前面痒玩,我們已經(jīng)成功跑起來(lái)faster-rcnn睁宰,并且自己動(dòng)手訓(xùn)練了PascalVOC2007圖像集智嚷,但是筆者接下來(lái)繼續(xù)沿著官方文件跑PascalVOC2012咖摹,訓(xùn)練到第二階段的某個(gè)點(diǎn)爹土,程序就開(kāi)始報(bào)錯(cuò)退出甥雕。查看logs發(fā)現(xiàn),找不到圖像的Annotation文件胀茵。遂放棄社露,轉(zhuǎn)而生成自己的圖像集。
圖像收集
這里筆者需要先完成畢業(yè)設(shè)計(jì)琼娘,所以圖像集設(shè)計(jì)的種類不多峭弟,也就三類。畢竟PascalVOC一開(kāi)始也只有四類呢脱拼!看了這么多經(jīng)驗(yàn)帖瞒瘸,總結(jié)需要注意一下幾個(gè)問(wèn)題:文件名不要大寫(xiě),后綴只能是.jpg或.JPEG挪拟,圖像不要長(zhǎng)得太過(guò)“瘦長(zhǎng)”挨务。這里為了重新命名收集來(lái)的文件,筆者自己寫(xiě)了一段Python腳本玉组,變量命名注釋極不規(guī)范谎柄,而且并不能識(shí)別某些后綴是jpg但并不是jpg的“假圖”,大家看看就好惯雳,勿噴朝巫。
# -*-coding:utf-8-*-
import os
from PIL import Image
import argparse
'''
This scrpit was designed for rename these jpeg images downloaded from different websites,we can use it to rename
as such format: 0000001.jpg
'''
def readimage(FileDir):
imageset = []
for image in os.listdir(FileDir):
if os.path.isfile(os.path.join(FileDir, image)):
imageset.append(image)
return imageset
def RenameFiles(FileDir, count=1):
extlist = ['.jpg', '.jpeg']
imageset = readimage(FileDir)
for image in imageset:
extension = os.path.splitext(image)[1].lower()
if FileDir == 'kiss':
clsnum = 0
if FileDir == 'hug':
clsnum = 1
if FileDir == 'hands':
clsnum = 2
if extension in extlist:
oldname = os.path.join(FileDir, image)
newimage = '0000' + str(count)
count = count + 1
newimagename = str(clsnum) + newimage[-5:] + '.jpg'
print newimagename
newname = os.path.join(FileDir, newimagename)
os.rename(oldname, newname)
else:
print "Exist non-jpeg files: " + image
break
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Rename demo')
parser.add_argument('-set', dest='dataset', help='choose dataset')
parser.add_argument('-count', dest='countnum', help='start number is set to 0',
default=0, type=int)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
if args.countnum == 0:
print "please change countnum"
else:
RenameFiles(args.dataset, args.countnum)
圖像標(biāo)定
這里采用的是labelImg的標(biāo)定工具,發(fā)動(dòng)同學(xué)和學(xué)姐學(xué)弟一起幫我標(biāo)定2333~~~
圖像分類
這里我們需要產(chǎn)生和PascalVOC一樣格式的txt文件石景,這里筆者事先研究過(guò)了劈猿,產(chǎn)生是有點(diǎn)規(guī)律的拙吉,具體見(jiàn)我的上一篇博客:折騰faster-rcnn(二)--數(shù)據(jù)篇
然后是幾個(gè)集合的比例問(wèn)題:
一般情況下trainval占總數(shù)據(jù)集的50%,test占總數(shù)據(jù)集的50%揪荣;train占trainval的50%筷黔,val占trainval的50%。上面所占百分比可根據(jù)自己的數(shù)據(jù)集修改仗颈,如果數(shù)據(jù)集比較少佛舱,test和val可少一些。這個(gè)在生成以上四個(gè)文件的腳本里面可以調(diào)比例挨决,暫時(shí)就用默認(rèn)比例試試请祖,跑跑。生成這幾個(gè)文件的腳本如下:
# -*-coding:utf-8-*-
import os
import random
import FileOperate as fo
'''
Generate txt files in directory VOC2012/ImageSet
'''
def GenerateSet(abspath, imgsetdir):
imgset = []
imgfiles = fo.readfile(os.path.join(abspath, imgsetdir))
for imgfile in imgfiles:
imgset.append(os.path.splitext(imgfile)[0])
rate = [0.5, 0.5, 0.5, 0.5]
clsset = ['kiss', 'hug', 'hands']
txtset = ['trainval.txt', 'train.txt', 'val.txt', 'test.txt']
trainvalset = []
testset = []
trainvalposset = set(random.sample(xrange(len(imgset)), int(len(imgset) * rate[0])))
testposset = set(xrange(len(imgset))) - trainvalposset
for pos in trainvalposset:
trainvalset.append(imgset[pos])
for pos in testposset:
testset.append(imgset[pos])
trainset = []
valset = []
trainposset = set(random.sample(xrange(len(trainvalset)), int(len(trainvalset) * rate[1])))
valposset = set(xrange(len(trainvalset))) - trainposset
for pos in trainposset:
trainset.append(trainvalset[pos])
for pos in valposset:
valset.append(trainvalset[pos])
fo.createtext(txtset[0], trainvalset, 2)
fo.createtext(txtset[1], trainset, 2)
fo.createtext(txtset[2], valset, 2)
fo.createtext(txtset[3], testset, 2)
for trainvalimg in trainvalset:
imgname = []
imgname.append(trainvalimg)
clsname = trainvalimg.split("_")[0]
if clsset[0] == clsname:
fo.createtext(clsset[0] + "_" + txtset[0], imgname, 1)
fo.createtext(clsset[1] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[0], imgname, -1)
elif clsset[1] == clsname:
fo.createtext(clsset[0] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[0], imgname, 1)
fo.createtext(clsset[2] + "_" + txtset[0], imgname, -1)
else:
fo.createtext(clsset[0] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[0], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[0], imgname, 1)
for trainimg in trainset:
imgname = []
imgname.append(trainimg)
clsname = trainimg.split("_")[0]
if clsset[0] == clsname:
fo.createtext(clsset[0] + "_" + txtset[1], imgname, 1)
fo.createtext(clsset[1] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[1], imgname, -1)
elif clsset[1] == clsname:
fo.createtext(clsset[0] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[1], imgname, 1)
fo.createtext(clsset[2] + "_" + txtset[1], imgname, -1)
else:
fo.createtext(clsset[0] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[1], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[1], imgname, 1)
for valimg in valset:
imgname = []
imgname.append(valimg)
clsname = valimg.split("_")[0]
if clsset[0] == clsname:
fo.createtext(clsset[0] + "_" + txtset[2], imgname, 1)
fo.createtext(clsset[1] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[2], imgname, -1)
elif clsset[1] == clsname:
fo.createtext(clsset[0] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[2], imgname, 1)
fo.createtext(clsset[2] + "_" + txtset[2], imgname, -1)
else:
fo.createtext(clsset[0] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[1] + "_" + txtset[2], imgname, -1)
fo.createtext(clsset[2] + "_" + txtset[2], imgname, 1)
for testimg in testset:
imgname = []
imgname.append(testimg)
fo.createtext(clsset[0] + "_" + txtset[3], imgname, 0)
fo.createtext(clsset[1] + "_" + txtset[3], imgname, 0)
fo.createtext(clsset[2] + "_" + txtset[3], imgname, 0)
if __name__ == "__main__":
abspath = os.path.abspath('.')
imgsetdir = 'JPEGImages'
GenerateSet(abspath, imgsetdir)
建立圖像庫(kù)
將以上幾步搞好后脖祈,我們需要仿照PSACALVOC2007的結(jié)構(gòu)建立自己的圖像庫(kù)肆捕,目錄結(jié)構(gòu)如下圖所示
VOCdevkit2007
|-results
|-VOC2007
|-Main #空目錄,用來(lái)存放test集結(jié)果
|-VOC2007
|-Annoations #標(biāo)定文件xml
|-ImageSets #txt文件存放至此
|-JPEGImages #jpg圖像存放至此
修改模型
這一步肯定是需要的盖高,因?yàn)镻ascalVOC有20類慎陵,所以我們需要根據(jù)自己的需求稍微修改神經(jīng)網(wǎng)絡(luò)參數(shù)。所有參數(shù)都在/models
下或舞,分為coco
和pascal_voc
格式荆姆,每種格式又有兩到三種網(wǎng)絡(luò)模型蒙幻,每種模型還有兩種訓(xùn)練方式Alternative training(alt-opt)
和Approximate joint training(end-to-end)
映凳。總之邮破,由于各種原因诈豌,本文以Alternative training+ZF model為例。筆者的需求是四類抒和,還有一類在這里叫__background__
類矫渔,一共五類。
要修改的文件:
1.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_train.pt
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按訓(xùn)練集類別改摧莽,該值為類別數(shù)+1
}
}
2.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt
layer {
name: 'data'
type: 'Python'
top: 'data'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按訓(xùn)練集類別改庙洼,該值為類別數(shù)+1
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 5 #按訓(xùn)練集類別改,該值為類別數(shù)+1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 20 #按訓(xùn)練集類別改镊辕,該值為(類別數(shù)+1)*4油够,四個(gè)頂點(diǎn)坐標(biāo)
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
3.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_rpn_train.pt
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按訓(xùn)練集類別改,該值為類別數(shù)+1
}
}
4.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt
layer {
name: 'data'
type: 'Python'
top: 'data'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 5" #按訓(xùn)練集類別改征懈,該值為類別數(shù)+1
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 5 #按訓(xùn)練集類別改石咬,該值為類別數(shù)+1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param {
lr_mult: 1.0
}
param {
lr_mult: 2.0
}
inner_product_param {
num_output: 20 #按訓(xùn)練集類別改,該值為(類別數(shù)+1)*4,四個(gè)頂點(diǎn)坐標(biāo)
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
5.py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
inner_product_param {
num_output: 5 #按訓(xùn)練集類別改卖哎,該值為類別數(shù)+1
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
inner_product_param {
num_output: 20 #按訓(xùn)練集類別改鬼悠,該值為(類別數(shù)+1)*4
}
}
注意删性,注意,這里文件夾進(jìn)行了切換焕窝,并不屬于模型的參數(shù)而是屬于支持庫(kù)的改動(dòng):
6.py-faster-rcnn/lib/datasets/pascal_voc.py
class pascal_voc(imdb):
def __init__(self, image_set, year, devkit_path=None):
imdb.__init__(self, 'voc_' + year + '_' + image_set)
self._year = year
self._image_set = image_set
self._devkit_path = self._get_default_path() if devkit_path is None \
else devkit_path
self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
self._classes = ('__background__', # always index 0
'kiss', 'hug', 'hands','person')# 有幾個(gè)類別此處就寫(xiě)幾個(gè)蹬挺,注意第一個(gè)(索引0位置)永遠(yuǎn)是__background__類
7.py-faster-rcnn/lib/datasets/imdb.py
def append_flipped_images(self):
num_images = self.num_images
widths = [PIL.Image.open(self.image_path_at(i)).size[0]
for i in xrange(num_images)]
for i in xrange(num_images):
boxes = self.roidb[i]['boxes'].copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
boxes[:, 0] = widths[i] - oldx2 - 1
boxes[:, 2] = widths[i] - oldx1 - 1
assert (boxes[:, 2] >= boxes[:, 0]).all()
entry = {'boxes': boxes,
'gt_overlaps': self.roidb[i]['gt_overlaps'],
'gt_classes': self.roidb[i]['gt_classes'],
'flipped': True}
self.roidb.append(entry)
self._image_index = self._image_index * 2
8.根目錄執(zhí)行./experiments/scripts/faster_rcnn_alt_opt.sh 0 ZF pascal_voc
,訓(xùn)練和測(cè)試一氣呵成,并且還有日志記錄它掂。如果只需要訓(xùn)練汗侵,那么執(zhí)行(無(wú)日志記錄):
./tools/train_faster_rcnn_alt_opt.py --gpu 0 --net_name ZF --weights data/imagenet_models/ZF.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_alt_opt.yml
Q&A:
1.如果標(biāo)簽含有大寫(xiě)字母,那么修改py-faster-rcnn/lib/datasets/pascal_voc.py第209行:
cls = self._class_to_ind[obj.find('name').text.lower().strip()]
2.出現(xiàn)錯(cuò)誤:
TypeError: 'NoneType' object has no attribute '__getitem__'
這是因?yàn)槟承﹫D像根本不是jpeg圖像群发,只是后綴被改動(dòng)了晰韵,這一下被學(xué)姐坑慘了。建議大家訓(xùn)練前統(tǒng)統(tǒng)用圖轉(zhuǎn)工具將圖像轉(zhuǎn)換一下格式熟妓,以絕后患雪猪。