使用的是這個工程 pierluigiferrari/ssd_keras
1.數(shù)據(jù)集標注
a.采用VoTT用于圖像檢測任務的數(shù)據(jù)集制作voc格式
2.模型訓練
使用這個文件里面https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd300_training.ipynb加載voc格式數(shù)據(jù)集的方法
利用這個https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd7_training.ipynb訓練一個淺層的深度學習模型晌该,這個模型部署到ncnn有若干層不支持
由于我利用vott制作的數(shù)據(jù)集中有些屬性缺少,加載數(shù)據(jù)集時會出錯,需要對文件data_generator/object_detection_2d_data_generator.py作出如下修改夺衍,
difficult = int(obj.difficult.text) 修改為 difficult = 0
if batch_inverse_transforms: batch_inverse_transforms.pop(j)修改為
if batch_inverse_transforms and j < len(batch_inverse_transforms): batch_inverse_transforms.pop(j)
我在標注數(shù)據(jù)集的時候有些出錯戏仓,在這里修改以下
for obj in objects:
print(filename)
class_name = obj.find('name').text
class_name = class_name.lower()
if class_name == 'thanks-1':
class_name = 'thanks'
class_id = self.classes.index(class_name)
3.利用tensorflow lite布置到移動端
a.The TensorFlow Lite iOS Demo App印蔗,使用量化模型實時預測
上面的demo如果要使用非量化模型需要做如下改動
// If you have your own model, modify this to the file name, and make sure
// you've added the file to your app resources too.
//static NSString* model_file_name = @"mobilenet_quant_v1_224";
static NSString* model_file_name = @"mobilenet_v1_1.0_224";
static NSString* model_file_type = @"tflite";
// Returns the top N confidence values over threshold in the provided vector,
// sorted by confidence in descending order.
static void GetTopN_float(const float* prediction, const int prediction_size, const int num_results,
const float threshold, std::vector<std::pair<float, int>>* top_results) {
// Will contain top N results in ascending order.
std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
std::greater<std::pair<float, int>>>
top_result_pq;
const long count = prediction_size;
for (int i = 0; i < count; ++i) {
const float value = prediction[i];//#######不用除以255.0
// Only add it if it beats the threshold and has a chance at being in
// the top N.
if (value < threshold) {
continue;
}
top_result_pq.push(std::pair<float, int>(value, i));
// If at capacity, kick the smallest value out.
if (top_result_pq.size() > num_results) {
top_result_pq.pop();
}
}
// Copy to output vector and reverse into descending order.
while (!top_result_pq.empty()) {
top_results->push_back(top_result_pq.top());
top_result_pq.pop();
}
std::reverse(top_results->begin(), top_results->end());
}
//注意如果相機是默認設置的話酵紫,pixelBuffer得到的圖片是橫著的
- (void)runModelOnFrame_float:(CVPixelBufferRef)pixelBuffer {
assert(pixelBuffer != NULL);
OSType sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
assert(sourcePixelFormat == kCVPixelFormatType_32ARGB ||
sourcePixelFormat == kCVPixelFormatType_32BGRA);
const int sourceRowBytes = (int)CVPixelBufferGetBytesPerRow(pixelBuffer);
const int image_width = (int)CVPixelBufferGetWidth(pixelBuffer);
const int fullHeight = (int)CVPixelBufferGetHeight(pixelBuffer);
CVPixelBufferLockFlags unlockFlags = kNilOptions;
CVPixelBufferLockBaseAddress(pixelBuffer, unlockFlags);
unsigned char* sourceBaseAddr = (unsigned char*)(CVPixelBufferGetBaseAddress(pixelBuffer));
int image_height;
unsigned char* sourceStartAddr;
if (fullHeight <= image_width) {
image_height = fullHeight;
sourceStartAddr = sourceBaseAddr;
} else {
image_height = image_width;
const int marginY = ((fullHeight - image_width) / 2);
sourceStartAddr = (sourceBaseAddr + (marginY * sourceRowBytes));
}
const int image_channels = 4;
assert(image_channels >= wanted_input_channels);
uint8_t* in = sourceStartAddr;
//根據(jù)當前問題填充輸入張量
//得到輸入張量數(shù)組中的第一個張量告嘲,也就是classifier中唯一的那個輸入張量,input是個整型值,語義是張量列表中的引索憨闰。第二條語句有兩個作用状蜗,
//1)以input為索引需五,在TfLiteTensor* content_.tensors這個張量表得到具體的張量鹉动。
int input = interpreter->inputs()[0];
//2)返回該張量data.raw,它指示張量正關(guān)聯(lián)著的內(nèi)存塊宏邮。有了out泽示,app就把可以把要預測的圖像數(shù)據(jù)填向它了。
float* out = interpreter->typed_tensor<float>(input);
const float input_mean = 127.5f;
const float input_std = 127.5f;
for (int y = 0; y < wanted_input_height; ++y) {
float* out_row = out + (y * wanted_input_width * wanted_input_channels);
for (int x = 0; x < wanted_input_width; ++x) {
const int in_x = (y * image_width) / wanted_input_width;
const int in_y = (x * image_height) / wanted_input_height;
uint8_t* in_pixel = in + (in_y * image_width * image_channels) + (in_x * image_channels);
float* out_pixel = out_row + (x * wanted_input_channels);
for (int c = 0; c < wanted_input_channels; ++c) {
//#############歸一化
out_pixel[c] = (in_pixel[c] - input_mean) / input_std;
}
}
}
//調(diào)用Invoke進行預測
double startTimestamp = [[NSDate new] timeIntervalSince1970];
if (interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke!";
}
double endTimestamp = [[NSDate new] timeIntervalSince1970];
total_latency += (endTimestamp - startTimestamp);
total_count += 1;
NSLog(@"Time: %.4lf, avg: %.4lf, count: %d", endTimestamp - startTimestamp,
total_latency / total_count, total_count);
const int output_size = 1000;
const int kNumResults = 5;
const float kThreshold = 0.1f;
std::vector<std::pair<float, int>> top_results;
//解析輸出張量得到識別結(jié)果
//作用是得到輸出張量關(guān)聯(lián)的內(nèi)存塊蜜氨。output存放的數(shù)據(jù)已是一維數(shù)組械筛,之后就可用它得到識別結(jié)果了
float* output = interpreter->typed_output_tensor<float>(0);
//GetTopN用于計算output數(shù)組中最大的N個值(first),以及它們的位置(second)
GetTopN_float(output, output_size, kNumResults, kThreshold, &top_results);
NSMutableDictionary* newValues = [NSMutableDictionary dictionary];
for (const auto& result : top_results) {
const float confidence = result.first;
const int index = result.second;
NSString* labelObject = [NSString stringWithUTF8String:labels[index].c_str()];
NSNumber* valueObject = [NSNumber numberWithFloat:confidence];
[newValues setObject:valueObject forKey:labelObject];
}
dispatch_async(dispatch_get_main_queue(), ^(void) {
[self setPredictionValues:newValues];
});
CVPixelBufferUnlockBaseAddress(pixelBuffer, unlockFlags);
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
}
運行非量化模型預測圖片
cd tensorflow/contrib/lite/examples/ios/simple
pod install
pod update
open tflite_simple_example.xcworkspace
demo的理解:
TensorFlow Lite(2/3):tflite文件和AI Smart
使用非壓縮模型會奔潰飒炎,解決方案如下:
mobilenet_quant_v1_224.tflite. but crash when i run my own model and give the error
label_image.cc
tensorflow .pb文件模型量化
Fixed Point Quantization
Using 8-bit calculations help your models run faster and use less power.
b.生成TFLite文件,要先安裝tensorflow源碼和bazel方法,編譯tensorflow源碼生成tensorflow
Installing Bazel
從源代碼安裝 TensorFlow
bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
如果tensorflow環(huán)境已經(jīng)搭建好的話埋哟,執(zhí)行到上一步就可以了
如果build過程失敗了,我們也可以執(zhí)行如下指令,只生成我們在模型轉(zhuǎn)化時需要的執(zhí)行文件
.bazel build tensorflow/python/tools:freeze_graph
bazel build --config=opt tensorflow/contrib/lite/toco:toco
bazel build tensorflow/tools/graph_transforms:summarize_graph
bazel build tensorflow/tools/quantization:quantize_graph
Exporting the Inference Graph
Convert the model format
tensorflow保存pb文件
Tensorflow Lite之編譯生成tflite文件
TensorFlow Lite學習筆記2:生成TFLite模型文件
TensorFlow Lite相關(guān)
如何量化現(xiàn)有的pb模型
bazel-bin/tensorflow/tools/quantization/quantize_graph --input=/home/ljg/下載/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224_frozen.pb --output_node_names=MobilenetV1/Predictions/Reshape_1 --output=quantized_graph.pb --mode=eightbit
bazel-bin/tensorflow/contrib/lite/toco/toco --input_file=quantized_graph.pb --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE --output_file=quantized_graphh.tflite --inference_type=QUANTIZED_UINT8 --inference_input_type=QUANTIZED_UINT8 --input_shapes=1,224,224,3 --input_arrays=input --output_arrays=MobilenetV1/Predictions/Reshape_1 --mean_values=128 --std_values=128 --default_ranges_min=0 --default_ranges_max=6
tensorflow lite: error when convert frozen model to lite format