前言 shared_ptr智能指針
為了解決C++內(nèi)存泄漏的問題脐嫂,C++11引入了智能指針(Smart Pointer)。
C++11提供了三種智能指針:std::shared_ptr, std::unique_ptr, std::weak_ptr,使用時(shí)需添加頭文件<memory>。
shared_ptr使用引用計(jì)數(shù),每一個(gè)shared_ptr的拷貝都指向相同的內(nèi)存辉浦。每使用他一次,內(nèi)部的引用計(jì)數(shù)加1茎辐,每析構(gòu)一次宪郊,內(nèi)部的引用計(jì)數(shù)減1掂恕,減為0時(shí),刪除所指向的堆內(nèi)存弛槐。shared_ptr內(nèi)部的引用計(jì)數(shù)是安全的懊亡,但是對(duì)象的讀取需要加鎖。
#include "stdafx.h"
#include <iostream>
#include <future>
#include <thread>
using namespace std;
class Person
{
public:
Person(int v) {
value = v;
std::cout << "Cons" <<value<< std::endl;
}
~Person() {
std::cout << "Des" <<value<< std::endl;
}
int value;
};
int main()
{
std::shared_ptr<Person> p1(new Person(1));// Person(1)的引用計(jì)數(shù)為1
std::shared_ptr<Person> p2 = std::make_shared<Person>(2);
p1.reset(new Person(3));// 首先生成新對(duì)象乎串,然后引用計(jì)數(shù)減1店枣,引用計(jì)數(shù)為0,故析構(gòu)Person(1)
// 最后將新對(duì)象的指針交給智能指針
std::shared_ptr<Person> p3 = p1;//現(xiàn)在p1和p3同時(shí)指向Person(3)叹誉,Person(3)的引用計(jì)數(shù)為2
p1.reset();//Person(3)的引用計(jì)數(shù)為1
p3.reset();//Person(3)的引用計(jì)數(shù)為0鸯两,析構(gòu)Person(3)
return 0;
}
注意,不能將一個(gè)原始指針直接賦值給一個(gè)智能指針长豁,如下所示钧唐,原因是一個(gè)是類,一個(gè)是指針匠襟。
std::shared_ptr<int> p4 = new int(1);// error
獲取原始指針
std::shared_ptr<int> p4(new int(5));
int *pInt = p4.get();
SyncedMemory 負(fù)責(zé)內(nèi)存同步
size_ :儲(chǔ)存空間的大小
head_ :SyncedMemory的當(dāng)前狀態(tài){UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED}
cpu_ptr_ :CPU指針
gpu_ptr_ :GPU指針
SyncedMemory(size_t size) 僅僅初始化size_钝侠,不分配內(nèi)存!
to_cpu()酸舍、to_gpu() 真正分配內(nèi)存帅韧,同步CPU和GPU
Blob 基本計(jì)算單元
重要的成員
//重要的成員函數(shù)
Blob(const int num, const int channels, const int height,const int width);
Reshape(const int num, const int channels, const int height,const int width);
const Dtype* cpu_data() const;
void set_cpu_data(Dtype* data);
const int* gpu_shape() const;
const Dtype* gpu_data() const;
const Dtype* cpu_diff() const;
const Dtype* gpu_diff() const;
Dtype* mutable_cpu_data();
Dtype* mutable_gpu_data();
Dtype* mutable_cpu_diff();
Dtype* mutable_gpu_diff();
void Update();
void FromProto(const BlobProto& proto, bool reshape = true);
/// @brief Compute the sum of absolute values (L1 norm) of the data.
Dtype asum_data() const;
/// @brief Compute the sum of absolute values (L1 norm) of the diff.
Dtype asum_diff() const;
/// @brief Compute the sum of squares (L2 norm squared) of the data.
Dtype sumsq_data() const;
/// @brief Compute the sum of squares (L2 norm squared) of the diff.
Dtype sumsq_diff() const;
//重要成員變量
shared_ptr<SyncedMemory> data_;
shared_ptr<SyncedMemory> diff_;
shared_ptr<SyncedMemory> shape_data_;
vector<int> shape_;
int count_;
int capacity_;
Blob(const int num, const int channels, const int height, const int width) 構(gòu)造函數(shù)
Blob<Dtype>::Blob(const int num, const int channels, const int height,
const int width)
// capacity_ must be initialized before calling Reshape
: capacity_(0) {
Reshape(num, channels, height, width);
}
調(diào)用了void Reshape(const int num, const int channels, const int height,const int width);
Reshape 變維函數(shù),負(fù)責(zé)申請(qǐng)內(nèi)存
template <typename Dtype>
void Blob<Dtype>::Reshape(const vector<int>& shape) {
CHECK_LE(shape.size(), kMaxBlobAxes);
count_ = 1;
shape_.resize(shape.size());
if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) {
shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int)));
}
int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
for (int i = 0; i < shape.size(); ++i) {
CHECK_GE(shape[i], 0);
CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
count_ *= shape[i];
shape_[i] = shape[i];
shape_data[i] = shape[i];
}
if (count_ > capacity_) {
capacity_ = count_;
data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
}
}
Reshape 調(diào)用了SyncedMemory 進(jìn)行內(nèi)存申請(qǐng)登記父腕,申請(qǐng)的長(zhǎng)度為capacity_ * sizeof(Dtype)弱匪,其中capacity_ =count_青瀑,注意的是這階段同時(shí)申請(qǐng)了data_和diff_兩塊內(nèi)存登記璧亮,即前向的結(jié)果何梯度的內(nèi)存但并未真正申請(qǐng),SyncedMemory 中的head處于UNINITIALIZED未初始化狀態(tài)斥难。
到此枝嘶,caffe的內(nèi)存申請(qǐng)沒有真正運(yùn)行,僅僅是登記了需要的內(nèi)存的size
Blob中重要的成員函數(shù)cpu_data()哑诊、mutable_cpu_data()群扶、gpu_data()、mutable_gpu_data()等則是真正申請(qǐng)內(nèi)存并注明數(shù)據(jù)所在(cpu還是gpu)
template <typename Dtype>
const Dtype* Blob<Dtype>::gpu_data() const {
CHECK(data_);
return (const Dtype*)data_->gpu_data();
}
template <typename Dtype>
Dtype* Blob<Dtype>::mutable_gpu_data() {
CHECK(data_);
return static_cast<Dtype*>(data_->mutable_gpu_data());
}
通過返回是否常指針來限制讀寫權(quán)限镀裤。
再來分析一下Blob的Update()竞阐,用戶網(wǎng)絡(luò)參數(shù)的更新,不支持int和unsigned int
// The "update" method is used for parameter blobs in a Net, which are stored
// as Blob<float> or Blob<double> -- hence we do not define it for
// Blob<int> or Blob<unsigned int>.
template <> void Blob<unsigned int>::Update() { NOT_IMPLEMENTED; }
template <> void Blob<int>::Update() { NOT_IMPLEMENTED; }
template <typename Dtype>
void Blob<Dtype>::Update() {
// We will perform update based on where the data is located.
switch (data_->head()) {
case SyncedMemory::HEAD_AT_CPU:
// perform computation on CPU
caffe_axpy<Dtype>(count_, Dtype(-1),
static_cast<const Dtype*>(diff_->cpu_data()),
static_cast<Dtype*>(data_->mutable_cpu_data()));
break;
case SyncedMemory::HEAD_AT_GPU:
case SyncedMemory::SYNCED:
#ifndef CPU_ONLY
// perform computation on GPU
caffe_gpu_axpy<Dtype>(count_, Dtype(-1),
static_cast<const Dtype*>(diff_->gpu_data()),
static_cast<Dtype*>(data_->mutable_gpu_data()));
#else
NO_GPU;
#endif
break;
default:
LOG(FATAL) << "Syncedmem not initialized.";
}
}
最后分析void FromProto(const BlobProto& proto, bool reshape = true);
從磁盤加載之前導(dǎo)出的Blob暑劝,僅到內(nèi)存中
template <typename Dtype>
void Blob<Dtype>::FromProto(const BlobProto& proto, bool reshape) {
if (reshape) {
vector<int> shape;
if (proto.has_num() || proto.has_channels() ||
proto.has_height() || proto.has_width()) {
// Using deprecated 4D Blob dimensions --
// shape is (num, channels, height, width).
shape.resize(4);
shape[0] = proto.num();
shape[1] = proto.channels();
shape[2] = proto.height();
shape[3] = proto.width();
} else {
shape.resize(proto.shape().dim_size());
for (int i = 0; i < proto.shape().dim_size(); ++i) {
shape[i] = proto.shape().dim(i);
}
}
Reshape(shape);
} else {
CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)";
}
// copy data
Dtype* data_vec = mutable_cpu_data();
if (proto.double_data_size() > 0) {
CHECK_EQ(count_, proto.double_data_size());
for (int i = 0; i < count_; ++i) {
data_vec[i] = proto.double_data(i);
}
} else {
CHECK_EQ(count_, proto.data_size());
for (int i = 0; i < count_; ++i) {
data_vec[i] = proto.data(i);
}
}
if (proto.double_diff_size() > 0) {
CHECK_EQ(count_, proto.double_diff_size());
Dtype* diff_vec = mutable_cpu_diff();
for (int i = 0; i < count_; ++i) {
diff_vec[i] = proto.double_diff(i);
}
} else if (proto.diff_size() > 0) {
CHECK_EQ(count_, proto.diff_size());
Dtype* diff_vec = mutable_cpu_diff();
for (int i = 0; i < count_; ++i) {
diff_vec[i] = proto.diff(i);
}
}
}
Layer
至少有一個(gè)輸入Blob(Bottom Blob)和一個(gè)輸出Blob(Top Blob)骆莹,部分layer帶有權(quán)重(weight)和偏置(bais),有兩個(gè)運(yùn)算方向:前向和反向
重要的成員
//成員函數(shù)
explicit Layer(const LayerParameter& param);
void SetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) = 0;
inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
inline void Backward(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom);
// Returns the vector of learnable parameter blobs.
vector<shared_ptr<Blob<Dtype> > >& blobs()
{
return blobs_;
}
//Returns the layer parameter.
const LayerParameter& layer_param() const { return layer_param_; }
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) = 0;
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top)
{
// LOG(WARNING) << "Using CPU code as backup.";
return Forward_cpu(bottom, top);
}
//成員變量
/** The protobuf that stores the layer parameters */
LayerParameter layer_param_;
/** The phase: TRAIN or TEST */
Phase phase_;
/** The vector that stores the learnable parameters as a set of blobs. */
vector<shared_ptr<Blob<Dtype> > > blobs_;
/** Vector indicating whether to compute the diff of each param blob. */
vector<bool> param_propagate_down_;
/** The vector that indicates whether each top blob has a non-zero weight in
* the objective function. */
vector<Dtype> loss_;
explicit Layer(const LayerParameter& param);
從LayerParameter加載配置担猛,既從model中加載幕垦,此過程會(huì)申請(qǐng)cpu內(nèi)存丢氢。
void SetUp(const vector<Blob<Dtype>>& bottom,const vector<Blob<Dtype>>& top);
*@簡(jiǎn)單實(shí)現(xiàn)通用層設(shè)置功能。
*
*@PARAM bottom 預(yù)輸入blob
*@PARAM top
*分配的但不成形的輸出斑點(diǎn)先改,通過整形來成形疚察。
*
*檢查底部和頂部斑點(diǎn)的數(shù)量是否正確。
*調(diào)用LaySt設(shè)置為各個(gè)層類型做特殊的層設(shè)置仇奶,
*隨后進(jìn)行整形以設(shè)置頂部斑點(diǎn)和內(nèi)部緩沖區(qū)的大小貌嫡。
*為任何非零損失權(quán)重設(shè)置損失權(quán)重乘數(shù)塊。
*此方法可能不會(huì)被重寫该溯。
void SetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top)
{
InitMutex();
CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
}
*簡(jiǎn)短的做層特定的設(shè)置:你的層應(yīng)該實(shí)現(xiàn)這個(gè)功能以及重塑衅枫。
*
*@PARAM bottom
*預(yù)先輸入的輸入blob,其數(shù)據(jù)字段為此層存儲(chǔ)輸入數(shù)據(jù)朗伶。
*
*@PARAM top
*分配但不成形的輸出top
*
*此方法應(yīng)進(jìn)行一次性層特定設(shè)置弦撩。這包括閱讀
*并從<代碼> Layer-PARAMM< <代碼>中處理相關(guān)參數(shù)。
*設(shè)置頂部塊和內(nèi)部緩沖區(qū)的形狀應(yīng)在
*<代碼>整形/<代碼>论皆,它將在向前傳遞之前調(diào)用益楼。
*調(diào)整頂部斑點(diǎn)大小。
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {}
layer比較簡(jiǎn)單点晴,就是具體實(shí)現(xiàn)各個(gè)層的功能
注意的是
layer_param_ 保存layer參數(shù)的protobuffer對(duì)象
blobs_ 保存內(nèi)部權(quán)值和偏置項(xiàng)