前言
目前基于深度學(xué)習(xí)的目標(biāo)檢測越來越火掘宪,其準(zhǔn)確度很高穴肘。筆者采用Yolo-v3實現(xiàn)目標(biāo)檢測歇盼。Yolo-v3基于darknet框架,該框架采用純c語言评抚,不依賴來其他第三方庫豹缀,相對于caffe框架在易用性對開發(fā)者友好(筆者編譯過數(shù)次caffe才成功)。本文基于windows平臺將yolo-v3編譯為動態(tài)鏈接庫dll,測試其檢測性能慨代。
New, python接口的YOLO-v3, !!!, 走過不要錯過
為了方便測試邢笙,本人將測試通過的Visual Studio工程貼出來
Yolov3-windows測試工程
鏈接:https://pan.baidu.com/s/1i6ZK2ZCGzWbfWT1-_fUTzg
提取碼:rj9o
復(fù)制這段內(nèi)容后打開百度網(wǎng)盤手機App,操作更方便哦
開發(fā)環(huán)境
- windows 10 x64
- Visual Studio 2017
- opencv3.4.0
- darknet 筆者直接fork自AlexeyAB/darknet
動態(tài)鏈接庫.dll的編譯過程就不再贅述侍匙,相信熟悉C++,編譯過opencv的小伙伴都很容易氮惯,本文測試使用的的cpu-only版本,筆者編譯過cpu-only, gpu兩個版本的yolo-v3 dll想暗,需要dll的請點贊支持哦妇汗。
Yolo-v3
-
代碼 C++,opencv
需要的文件可以在darknet鏈接下載打到:
- yolov3.cfg,yolov3的網(wǎng)絡(luò)結(jié)構(gòu)描述文件
- yolov3.weights说莫,yolov3訓(xùn)練好的權(quán)重文件杨箭,在coco數(shù)據(jù)集上訓(xùn)練的
- coco.names, coco數(shù)據(jù)集的目標(biāo)類別文件
#include<iostream>
#include<opencv2/opencv.hpp>
#include<yolo\include\yolo_v2_class.hpp>
using namespace std;
using namespace cv;
const string CFG_FILE = "darknet-master\\cfg\\yolov3.cfg";
const string WEIGHT_FILE = "yolov3.weights";
const string COCO_NAMES = "darknet-master\\cfg\\coco.names";
class Object
{
public:
Object();
Object::Object(int id, float confidence, Rect rect, String name);
~Object();
public:
int id;
float confidence;
Rect rect;
String name;
private:
};
Object::Object() {
}
Object::Object(int id,float confidence,Rect rect,String name) {
this->id = id;
this->confidence = confidence;
this->rect = rect;
this->name = name;
}
Object::~Object() {
}
int main() {
//--------------------------實例化一個Yolo檢測器---------------------------
Detector yolo_detector(CFG_FILE, WEIGHT_FILE);
//讀取目標(biāo)類別文件,80類
vector<String> classNames;
ifstream fileIn(COCO_NAMES, ios::in);
if (!fileIn.is_open()) {
cerr << "failed to load COCO.names!" << endl;
return -1;
}
for (int i = 0; i < 80; i++) {
char temp1[100];
fileIn.getline(temp1, 100);
string temp2(temp1);
classNames.push_back(String(temp2));
}
//---------------------------加載輸入圖像-----------------------------------
auto image = Detector::load_image("7.jpg");
cout << "圖像寬度=" << image.w << endl
<< "圖像高度=" << image.h << endl
<< "圖像通道=" << image.c << endl;
//-----------------------------目標(biāo)檢測---------------------------------------
TickMeter t;
t.start();
auto res = yolo_detector.detect(image);
t.stop();
cout << "YOLO-v3檢測時間=" << t.getTimeSec() << "sec" << endl;
//----------------------------解析檢測結(jié)果---------------------------------------
vector<Object> detectObjects;
for (auto& i:res) {
int id = i.obj_id;
float confidence = i.prob;
String name = classNames[id];
Rect rect = Rect{ static_cast<int>(i.x),static_cast<int>(i.y),static_cast<int>(i.w),static_cast<int>(i.h) };
detectObjects.push_back(Object{ id,confidence,rect,name });
}
//----------------------------繪制結(jié)果---------------------------------------------
Mat im_src = imread("7.jpg");
for (auto& i:detectObjects) {
rectangle(im_src, i.rect, Scalar(0, 255, 255), 2);
putText(im_src, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0),2);
}
imshow("yolo-v3", im_src);
waitKey(0);
}
-
API介紹
yolo_v2_class.hpp
中定義了Detector,將yolo封裝到C++類中,方便使用储狭。下面是Detector類的源碼。
class Detector {
std::shared_ptr<void> detector_gpu_ptr;
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
const int cur_gpu_id;
public:
float nms = .4;
bool wait_stream;
YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
YOLODLL_API ~Detector();
YOLODLL_API std::vector<bbox_t> detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
YOLODLL_API std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
static YOLODLL_API image_t load_image(std::string image_filename);
static YOLODLL_API void free_image(image_t m);
YOLODLL_API int get_net_width() const;
YOLODLL_API int get_net_height() const;
YOLODLL_API std::vector<bbox_t> tracking_id(std::vector<bbox_t> cur_bbox_vec, bool const change_history = true,
int const frames_story = 10, int const max_dist = 150);
std::vector<bbox_t> detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)
{
if (img.data == NULL)
throw std::runtime_error("Image is empty");
auto detection_boxes = detect(img, thresh, use_mean);
float wk = (float)init_w / img.w, hk = (float)init_h / img.h;
for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;
return detection_boxes;
}
#ifdef OPENCV
std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)
{
if(mat.data == NULL)
throw std::runtime_error("Image is empty");
auto image_ptr = mat_to_image_resize(mat);
return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean);
}
std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const
{
if (mat.data == NULL) return std::shared_ptr<image_t>(NULL);
cv::Mat det_mat;
cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
return mat_to_image(det_mat);
}
static std::shared_ptr<image_t> mat_to_image(cv::Mat img_src)
{
cv::Mat img;
cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);
std::shared_ptr<image_t> image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });
std::shared_ptr<IplImage> ipl_small = std::make_shared<IplImage>(img);
*image_ptr = ipl_to_image(ipl_small.get());
return image_ptr;
}
private:
static image_t ipl_to_image(IplImage* src)
{
unsigned char *data = (unsigned char *)src->imageData;
int h = src->height;
int w = src->width;
int c = src->nChannels;
int step = src->widthStep;
image_t out = make_image_custom(w, h, c);
int count = 0;
for (int k = 0; k < c; ++k) {
for (int i = 0; i < h; ++i) {
int i_step = i*step;
for (int j = 0; j < w; ++j) {
out.data[count++] = data[i_step + j*c + k] / 255.;
}
}
}
return out;
}
static image_t make_empty_image(int w, int h, int c)
{
image_t out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
static image_t make_image_custom(int w, int h, int c)
{
image_t out = make_empty_image(w, h, c);
out.data = (float *)calloc(h*w*c, sizeof(float));
return out;
}
#endif // OPENCV
};
- 主要的方法:
- 構(gòu)造方法
YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
輸入:配置文件(.cfg) 權(quán)重文件(.weight) gui_id表示使用的哪個GPU
- 加載輸入圖像
static YOLODLL_API image_t load_image(std::string image_filename);
輸入:圖像名稱
此方法為靜態(tài)方法晶密,將二維圖像轉(zhuǎn)為張量Tensor
-
測試圖像 均來自百度圖片
- 動物擒悬,人
- 車輛,人
測試結(jié)果
-
動物稻艰,人的檢測
-
車輛懂牧,行人的檢測
從以上測試結(jié)果來看,yolo-v3的準(zhǔn)確度上性能非凡尊勿,較小尺寸的目標(biāo)也可以檢測到僧凤。相對于MobileNet-SSD(v1版本)準(zhǔn)確度上要好。
Yolo-v3封裝為python接口
yolo原始接口采用C語言元扔, 對于不熟悉C/C++的同學(xué)不友好躯保, 其次C語言每次都要編譯,相對麻煩澎语。 現(xiàn)在深度學(xué)習(xí)采用python 才是標(biāo)配途事, 因此本人利用pybind11封裝為python API验懊。
首先需要配置pybind11, 見文章:pybind11使用
工程配置
pybind11封裝接口
python_api.cpp
#include<pybind11/pybind11.h>
#include<pybind11/stl.h>
#include<pybind11/numpy.h>
#include<string>
#include<vector>
#include<opencv2/opencv.hpp>
#include<yolo/include/yolo_v2_class.hpp>
using namespace cv;
namespace py = pybind11;
class Object
{
public:
Object();
Object(int id, float confidence, std::vector<int> rect, std::string name);
~Object();
public:
int id;
float confidence;
std::vector<int> rect; //[xmin, ymin, xmax, ymax]
std::string name;
};
Object::Object() {
}
Object::Object(int id, float confidence, std::vector<int> rect, std::string name) {
this->id = id;
this->confidence = confidence;
this->rect = rect;
this->name = name;
}
Object::~Object() {
}
class YoloDetector : public Detector {
public:
std::string weights_file;
std::string cfg_file;
private:
std::vector<std::string> classNames;
image_t cvMat_to_image_t(cv::Mat& image) {
image_t dst;
dst.w = image.cols;
dst.h = image.rows;
dst.c = image.channels();
dst.data = new float[dst.w*dst.h*dst.c * sizeof(float)];
int count = 0;
for (int i = 0; i < image.rows; i++)
{
for (int j = 0; j < image.cols; j++)
{
cv::Vec3b pixel = image.at<Vec3b>(i, j);
dst.data[count] = (float)pixel[0];
dst.data[count+1] = (float)pixel[1];
dst.data[count+2] = (float)pixel[2];
count += 3;
}
}
return dst;
}
public:
YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {
this->weights_file = weights_file;
this->cfg_file = cfg_file;
};
~YoloDetector() {};
public:
void setCOCOName(std::vector<std::string> names) {
for (auto i: names)
{
this->classNames.push_back(i);
}
}
//// image: BGR Format
//std::vector<Object> detectImage(cv::Mat& image) {
// Mat rgb_image;
// cvtColor(image, rgb_image, COLOR_BGR2RGB);
// cv::resize(rgb_image, rgb_image, cv::Size(this->get_net_width(), this->get_net_height()));
// image_t image_ = this->cvMat_to_image_t(rgb_image);
//
// auto res = this->detect(image_);
// std::vector<Object> objs;
// for (auto i:res)
// {
// objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
// }
// return objs;
//}
// image: BGR Format
std::vector<Object> detectImage(std::string image_name) {
auto res = this->detect(Detector::load_image(image_name));
std::vector<Object> objs;
for (auto i : res)
{
objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
}
return objs;
}
};
#if 0
int main() {
const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";
const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";
const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";
//讀取目標(biāo)類別文件,80類
std::vector<std::string> classNames;
std::ifstream fileIn(COCO_NAMES, std::ios::in);
if (!fileIn.is_open()) {
std::cerr << "failed to load COCO.names!" << std::endl;
return -1;
}
for (int i = 0; i < 80; i++) {
char temp1[100];
fileIn.getline(temp1, 100);
std::string temp2(temp1);
classNames.push_back(temp2);
}
YoloDetector detector(WEIGHT_FILE, CFG_FILE);
detector.setCOCOName(classNames);
cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");
auto detectObjects = detector.detectImage("D:\\YOLO-v3\\darknet-test.jpg");
for (auto& i : detectObjects) {
cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);
//putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
}
imshow("yolo", image);
waitKey(0);
}
#endif
#if 1
PYBIND11_MODULE(yolov3, m) {
py::class_<Object>(m, "Object")
.def(py::init<int, float, std::vector<int>, std::string>())
.def_readwrite("id", &Object::id)
.def_readwrite("confidence", &Object::confidence)
.def_readwrite("rect", &Object::rect)
.def_readwrite("name", &Object::name);
py::class_<YoloDetector>(m, "YoloDetector")
.def(py::init<std::string, std::string>())
.def("detectImage", &YoloDetector::detectImage)
.def("setCOCOName", &YoloDetector::setCOCOName);
}
#endif
生成python可以調(diào)用的動態(tài)庫
在pycharm中調(diào)用
new一個工程, 在工程目錄下new一個 package
python代碼
import demo18.yolov3 as yolov3
import cv2
detector = yolov3.YoloDetector('D:\\YOLO-v3\\yolov3.weights', 'D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg')
help(detector)
COCOName = []
with open('D:\\YOLO-v3\\darknet-master\\cfg\\coco.names', 'r') as f:
for i in f:
COCOName.append(i.rstrip())
detector.setCOCOName(COCOName)
out = detector.detectImage('D:\\YOLO-v3\\darknet-test.jpg')
image = cv2.imread('D:\\YOLO-v3\\darknet-test.jpg')
for i in out:
rect = i.rect
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255))
cv2.putText(image, i.name, (rect[0], rect[1]), 1, 1, (0, 0, 255))
cv2.imshow('yolo', image)
cv2.waitKey(0)
結(jié)果
上面的python接口只支持輸入文件尸变, 只能測試單張圖像义图。對于視頻目標(biāo)目標(biāo)檢測行不通,因此召烂,在此基礎(chǔ)上繼續(xù)封裝接口碱工。
主要實現(xiàn)了2個方法:
- detectFromFile() 輸入圖像文件
- detectImage() 輸入numpy.ndarray對象, BGR格式
C++代碼
#if 1
#include<pybind11/pybind11.h>
#include<pybind11/stl.h>
#include<pybind11/numpy.h>
#include<string>
#include<vector>
#include<opencv2/opencv.hpp>
#include<yolo/include/yolo_v2_class.hpp>
#include"ndarray_converter.h"
using namespace cv;
namespace py = pybind11;
cv::Mat testCV(cv::Mat& img_bgr) {
cv::Mat dst;
cv::cvtColor(img_bgr, dst, COLOR_BGR2GRAY);
return dst;
}
image_t cv_mat_to_image_t(cv::Mat& image) {
float* data = new float[image.rows*image.cols*image.channels()];
if (data == nullptr)
{
std::runtime_error("failed to malloc men!");
}
int cnt = 0;
for (int i = 0; i < image.rows; i++)
{
for (int j = 0; j < image.cols; j++)
{
if (image.channels() == 3)
{
float r = image.at<Vec3b>(i, j)[2] / 255.0f;
float g = image.at<Vec3b>(i, j)[1] / 255.0f;
float b = image.at<Vec3b>(i, j)[0] / 255.0f;
data[cnt] = r;
data[cnt + 1] = g;
data[cnt + 2] = b;
cnt += 3;
}
else
{
data[cnt] = static_cast<float>(image.at<uchar>(i, j)) / 255.0f;
cnt += 1;
}
}
}
image_t imaget;
imaget.c = image.channels();
imaget.h = image.rows;
imaget.w = image.cols;
imaget.data = data;
}
image_t make_empty_image(int w, int h, int c)
{
image_t out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
image_t make_image(int w, int h, int c)
{
image_t out = make_empty_image(w, h, c);
out.data = new float[w*h*c];
return out;
}
image_t cv_mat_to_image_t2(cv::Mat& image) {
Mat dst;
cv::cvtColor(image, dst, COLOR_BGR2RGB);
int w, h, c;
w = image.cols;
h = image.rows;
int channels = image.channels();
unsigned char *data = dst.data;
if (!data)
throw std::runtime_error("file not found");
if (channels) c = channels;
int i, j, k;
image_t im = make_image(w, h, c);
for (k = 0; k < c; ++k) {
for (j = 0; j < h; ++j) {
for (i = 0; i < w; ++i) {
int dst_index = i + w * j + w * h*k;
int src_index = k + c * i + c * w*j;
im.data[dst_index] = (float)data[src_index] / 255.;
}
}
}
//free(data);
return im;
}
class Object
{
public:
Object();
Object(int id, float confidence, std::vector<int> rect, std::string name);
~Object();
public:
int id;
float confidence;
std::vector<int> rect; //[xmin, ymin, xmax, ymax]
std::string name;
};
Object::Object() {
}
Object::Object(int id, float confidence, std::vector<int> rect, std::string name) {
this->id = id;
this->confidence = confidence;
this->rect = rect;
this->name = name;
}
Object::~Object() {
}
class YoloDetector : public Detector {
public:
std::string weights_file;
std::string cfg_file;
private:
std::vector<std::string> classNames;
public:
YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {
this->weights_file = weights_file;
this->cfg_file = cfg_file;
};
~YoloDetector() {};
public:
void setCOCOName(std::vector<std::string> names) {
for (auto i: names)
{
this->classNames.push_back(i);
}
}
// image: BGR Format
std::vector<Object> detectFromFile(std::string image_name) {
auto res = this->detect(Detector::load_image(image_name));
std::vector<Object> objs;
for (auto i : res)
{
objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
}
return objs;
}
/*
overload
*/
std::vector<Object> detectImage(cv::Mat& image) {
/*float* data = new float[image.rows*image.cols*image.channels()];
if (data==nullptr)
{
std::runtime_error("failed to malloc men!");
}
int cnt = 0;
for (int i = 0; i < image.rows; i++)
{
for (int j = 0; j < image.cols; j++)
{
if (image.channels()==3)
{
float r = image.at<Vec3b>(i, j)[2] / 255.0f;
float g = image.at<Vec3b>(i, j)[1] / 255.0f;
float b = image.at<Vec3b>(i, j)[0] / 255.0f;
data[cnt] = r;
data[cnt+1] = g;
data[cnt+2] = b;
cnt += 3;
}
else
{
data[cnt] = static_cast<float>(image.at<uchar>(i, j)) / 255.0f;
cnt += 1;
}
}
}
image_t imaget;
imaget.c = image.channels();
imaget.h = image.rows;
imaget.w = image.cols;
imaget.data = data;
std::cout << "yolo: image input ok!" << std::endl;
std::cout << "yolo: start to detect" << std::endl;*/
auto imaget = cv_mat_to_image_t2(image);
auto res = this->detect(imaget);
std::cout << "yolo:finish to detect" << std::endl;
std::vector<Object> objs;
for (auto i : res)
{
objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
}
return objs;
}
};
#if 1
int main() {
const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";
const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";
const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";
//讀取目標(biāo)類別文件,80類
std::vector<std::string> classNames;
std::ifstream fileIn(COCO_NAMES, std::ios::in);
if (!fileIn.is_open()) {
std::cerr << "failed to load COCO.names!" << std::endl;
return -1;
}
for (int i = 0; i < 80; i++) {
char temp1[100];
fileIn.getline(temp1, 100);
std::string temp2(temp1);
classNames.push_back(temp2);
}
YoloDetector detector(WEIGHT_FILE, CFG_FILE);
detector.setCOCOName(classNames);
cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");
//auto detectObjects = detector.detectFromFile("D:\\YOLO-v3\\darknet-test.jpg");
auto detectObjects = detector.detectImage(image);
for (auto& i : detectObjects) {
cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);
//putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
}
imshow("yolo", image);
waitKey(0);
}
#endif
//.def("detectImage", py::overload_cast<cv::Mat>(&YoloDetector::detectImage))
//.def("detectImage", py::overload_cast<std::string>(&YoloDetector::detectImage))
#if 0
PYBIND11_MODULE(yolov3, m) {
NDArrayConverter::init_numpy();
py::class_<Object>(m, "Object")
.def(py::init<int, float, std::vector<int>, std::string>())
.def_readwrite("id", &Object::id)
.def_readwrite("confidence", &Object::confidence)
.def_readwrite("rect", &Object::rect)
.def_readwrite("name", &Object::name);
m.def("test_cv", &testCV, py::arg("image_bgr"));
py::class_<YoloDetector>(m, "YoloDetector")
.def(py::init<std::string, std::string>())
.def("detectFromFile",&YoloDetector::detectFromFile, py::arg("image_file"))
.def("detectImage", &YoloDetector::detectImage, py::arg("image_bgr"))
.def("setCOCOName", &YoloDetector::setCOCOName);
}
#endif
#endif // 0
結(jié)果
End
本文主要實現(xiàn)來了windows平臺下yolo-v3的快速測試使用奏夫,關(guān)于yolo網(wǎng)絡(luò)結(jié)構(gòu)的設(shè)計怕篷,yolo模型的訓(xùn)練,下期再詳細(xì)介紹酗昼,感謝甜心的大力支持匙头。