FasterRCNNMetaArch的詳解:
上篇說到init函數(shù)就是對(duì)參數(shù)的提取如下:
- init()
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
# 檢查參數(shù)是否正確
if is_training and second_stage_batch_size > first_stage_max_proposals:
raise ValueError('second_stage_batch_size should be no greater than '
'first_stage_max_proposals.')
if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type '
'grid_anchor_generator.GridAnchorGenerator.')
# 獲取參數(shù)柜某,這些都是設(shè)置參數(shù)
self._is_training = is_training
self._image_resizer_fn = image_resizer_fn # 圖片resize函數(shù)
self._feature_extractor = feature_extractor # feature_extractor提取函數(shù)耕魄,在上面有介紹
self._first_stage_only = first_stage_only # 是否只進(jìn)行區(qū)域提取
# The first class is reserved as background.
# 設(shè)置第一個(gè)類為背景類
unmatched_cls_target = tf.constant(
[1] + self._num_classes * [0], dtype=tf.float32)
# target_assigner是創(chuàng)建任務(wù)的類
self._proposal_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN', 'proposal')
self._detector_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN', 'detection', unmatched_cls_target=unmatched_cls_target)
# Both proposal and detector target assigners use the same box coder
self._box_coder = self._proposal_target_assigner.box_coder
# (First stage) Region proposal network parameters
# 獲取第一階段的anchor_generator生成器
self._first_stage_anchor_generator = first_stage_anchor_generator
self._first_stage_atrous_rate = first_stage_atrous_rate
self._first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope)
self._first_stage_box_predictor_kernel_size = (
first_stage_box_predictor_kernel_size)
self._first_stage_box_predictor_depth = first_stage_box_predictor_depth
self._first_stage_minibatch_size = first_stage_minibatch_size
# 在這里進(jìn)行正負(fù)樣本的采樣
self._first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=first_stage_positive_balance_fraction)
self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor(
self._is_training, num_classes=1,
conv_hyperparams=self._first_stage_box_predictor_arg_scope,
min_depth=0, max_depth=0, num_layers_before_predictor=0,
use_dropout=False, dropout_keep_prob=1.0, kernel_size=1,
box_code_size=self._box_coder.code_size)
# 第一階段的非極大抑制值毛萌,iou济丘,最大推薦區(qū)域數(shù)量
self._first_stage_nms_score_threshold = first_stage_nms_score_threshold
self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
self._first_stage_max_proposals = first_stage_max_proposals
# 產(chǎn)生WeightedSmoothL1LocalizationLoss和WeightedSoftmaxClassificationLoss
self._first_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
self._first_stage_objectness_loss = (
losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True))
self._first_stage_loc_loss_weight = first_stage_localization_loss_weight
self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
# Per-region cropping parameters
# 設(shè)置ROI的大小
self._initial_crop_size = initial_crop_size
self._maxpool_kernel_size = maxpool_kernel_size
self._maxpool_stride = maxpool_stride
self._mask_rcnn_box_predictor = second_stage_mask_rcnn_box_predictor
# 還是提取第二階段的參數(shù),
self._second_stage_batch_size = second_stage_batch_size
self._second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=second_stage_balance_fraction)
# 第二階段非極大抑制值跑筝,iou痰催,最大推薦區(qū)域數(shù)量
self._second_stage_nms_fn = second_stage_non_max_suppression_fn
self._second_stage_score_conversion_fn = second_stage_score_conversion_fn
# 第二階段的loss
self._second_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
self._second_stage_classification_loss = second_stage_classification_loss
self._second_stage_mask_loss = (
losses.WeightedSigmoidClassificationLoss(anchorwise_output=True))
self._second_stage_loc_loss_weight = second_stage_localization_loss_weight
self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
self._second_stage_mask_loss_weight = (
second_stage_mask_prediction_loss_weight)
self._hard_example_miner = hard_example_miner
self._parallel_iterations = parallel_iterations
- FasterRCNNMetaArch的內(nèi)部屬性
@property
def first_stage_feature_extractor_scope(self):
return 'FirstStageFeatureExtractor'
@property
def second_stage_feature_extractor_scope(self):
return 'SecondStageFeatureExtractor'
@property
def first_stage_box_predictor_scope(self):
return 'FirstStageBoxPredictor'
@property
def second_stage_box_predictor_scope(self):
return 'SecondStageBoxPredictor'
@property
def max_num_proposals(self):
if self._is_training and not self._hard_example_miner:
return self._second_stage_batch_size
return self._first_stage_max_proposals
其中max_num_proposals():
是的batch中每張圖的最大的建議區(qū)域的數(shù)量屬性站欺。
在訓(xùn)練時(shí)如果hardexample miner
沒有設(shè)置使用second_stage_batch_size
否則使用first_stage_max_proposals
而在進(jìn)行推斷時(shí)使用的總是first_stage_max_proposals
.
- preprocess(self, inputs)
def preprocess(self, inputs):
if inputs.dtype is not tf.float32:
raise ValueError('`preprocess` expects a tf.float32 tensor')
with tf.name_scope('Preprocessor'):
resized_inputs = tf.map_fn(self._image_resizer_fn,
elems=inputs,
dtype=tf.float32,
parallel_iterations=self._parallel_iterations)
return self._feature_extractor.preprocess(resized_inputs)
這是調(diào)用FasterRCNNFeatureExtractor.preprocess()函數(shù)進(jìn)行負(fù)責(zé)額外的預(yù)處理(例如縮放像素值在[-1,1]中),感覺很一般啊甜癞。請(qǐng)看object_detectionAPI源碼閱讀筆記(8-faster_rcnn_inception_resnet_v2_feature_extractor.py)
- predict(self, preprocessed_inputs)
def predict(self, preprocessed_inputs):
(rpn_box_predictor_features, rpn_features_to_crop, anchors_boxlist,
image_shape) = self._extract_rpn_feature_maps(preprocessed_inputs)
(rpn_box_encodings, rpn_objectness_predictions_with_background
) = self._predict_rpn_proposals(rpn_box_predictor_features)
# The Faster R-CNN paper recommends pruning anchors that venture outside
# the image window at training time and clipping at inference time.
clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]]))
if self._is_training:
(rpn_box_encodings, rpn_objectness_predictions_with_background,
anchors_boxlist) = self._remove_invalid_anchors_and_predictions(
rpn_box_encodings, rpn_objectness_predictions_with_background,
anchors_boxlist, clip_window)
else:
anchors_boxlist = box_list_ops.clip_to_window(
anchors_boxlist, clip_window)
anchors = anchors_boxlist.get()
prediction_dict = {
'rpn_box_predictor_features': rpn_box_predictor_features,
'rpn_features_to_crop': rpn_features_to_crop,
'image_shape': image_shape,
'rpn_box_encodings': rpn_box_encodings,
'rpn_objectness_predictions_with_background':
rpn_objectness_predictions_with_background,
'anchors': anchors
}
if not self._first_stage_only:
prediction_dict.update(self._predict_second_stage(
rpn_box_encodings,
rpn_objectness_predictions_with_background,
rpn_features_to_crop,
anchors, image_shape))
return prediction_dict
這個(gè)函數(shù)是對(duì)preprocessed_inputs處理的圖像進(jìn)行前向處理夕晓,產(chǎn)生最原始的預(yù)測(cè)。如果 first_stage_only
被設(shè)置為True,這個(gè)方程就會(huì)輸出RPN predictions (un-postprocessed).否則就會(huì)輸出first stage RPN predictions和second stage box classifier predictions.
其他需要注意的地方:
+ Anchor pruning vs. clipping: 按照Faster R-CNN paper建議, 在訓(xùn)練時(shí)刪掉錨點(diǎn)邊界超出圖片的邊界而在進(jìn)行推斷時(shí)(預(yù)測(cè))我們僅僅修建這些錨點(diǎn)悠咱。
+ Proposal padding:每一個(gè)批次的區(qū)域建議數(shù)量都會(huì)被擴(kuò)充到self._max_num_proposals(在訓(xùn)練時(shí)蒸辆,一般時(shí)正樣本不夠,拿負(fù)樣本進(jìn)行填充析既,假如self._max_num_proposals==128躬贡,正負(fù)樣本相加必需等于這個(gè)數(shù))所以每批次的batch size 是一樣的。
Args:
preprocessed_inputs: shape=[batch, height, width, channels] 的一張經(jīng)過preprocessed處理的圖片眼坏。
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: shape = [batch_size, height, width, depth] 由rpn_box_predictor_features提取的一張?zhí)卣鲌D拂玻。是用來預(yù)測(cè)proposal boxes和相應(yīng)的目標(biāo)為前景還是背景的得分(背景得分只有[0,1])。
2) rpn_features_to_crop: shape=[batch_size, height, width, depth]用于給RPN的特征圖。(RPN時(shí)給任意尺寸一張?zhí)卣鲌D輸出固定的尺寸的特征圖)
3) image_shape: a 1-D 代表input image shape.
4) rpn_box_encodings: shape= [batch_size, num_anchors, self._box_coder.code_size]檐蚜,是預(yù)測(cè)框的形狀
5) rpn_objectness_predictions_with_background: shape=[batch_size, num_anchors, 2]每個(gè)錨點(diǎn)的類別 (logits)魄懂,包含了背景預(yù)測(cè)在 (at class index 0).
6) anchors: shape = [num_anchors, 4] 代表first stage RPN (絕對(duì)坐標(biāo))的坐標(biāo). `num_anchors` 在訓(xùn)練和推斷時(shí)是不一樣的。
--------------------------------------------------------------------------
接下來是第二階段才會(huì)返回的值闯第。
7) refined_box_encodings: shape=[total_num_proposals, num_classes, 4] 經(jīng)過過濾的最終編碼坐標(biāo)逢渔,total_num_proposals=batch_size*self._max_num_proposals
8) class_predictions_with_background: shape=[total_num_proposals, num_classes + 1] 每個(gè)盒子對(duì)類別的預(yù)測(cè),total_num_proposals=batch_size*self._max_num_proposals.包含背景類別(at class index 0).
9) num_proposals: `self.max_num_proposals` .
10) proposal_boxes: shape=[batch_size, self.max_num_proposals, 4]使用絕對(duì)左邊解碼proposal_boxes.
11) mask_predictions: (optional) shape=[total_num_padded_proposals, num_classes, mask_height, mask_width]目標(biāo)的掩碼.
- postprocess(self, prediction_dict)
def postprocess(self, prediction_dict):
with tf.name_scope('FirstStagePostprocessor'):
image_shape = prediction_dict['image_shape']
if self._first_stage_only:
proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn(
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
image_shape)
return {
'detection_boxes': proposal_boxes,
'detection_scores': proposal_scores,
'num_detections': tf.to_float(num_proposals)
}
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shape,
mask_predictions=mask_predictions)
return detections_dict
這個(gè)方程把原始的預(yù)測(cè)輸出傳換成最終的檢測(cè)結(jié)果,預(yù)測(cè)分?jǐn)?shù)是基于logits的乡括,first_stage_only=True返回時(shí)來自first stage RPN(每張圖片self.max_num_proposals 個(gè)區(qū)域)肃廓,否則結(jié)果來自two-stage(每張圖片self._max_detections 個(gè)區(qū)域)結(jié)果是被轉(zhuǎn)換成multiclass detections
Args:
prediction_dict: 是一個(gè)包含所有預(yù)測(cè)結(jié)果的字典。當(dāng)first_stage_only=True,字典包含 (rpn_box_encodings
,rpn_objectness_predictions_with_background
, rpn_features_to_crop
, image_shape
, anchors
)否則在字典中還會(huì)有(refined_box_encodings
,class_predictions_with_background
, num_proposals
,proposal_boxes
, optionally, mask_predictions
)
Returns:
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detection, 4]诲泌,檢測(cè)框的坐標(biāo)
detection_scores: [batch, max_detections]盲赊,檢測(cè)款的分?jǐn)?shù)
detection_classes: [batch, max_detections],檢測(cè)框的類別
當(dāng)(rpn_mode=False)時(shí)才會(huì)創(chuàng)建敷扫。
num_detections: [batch]
- loss(self, prediction_dict, scope=None)
def loss(self, prediction_dict, scope=None):
with tf.name_scope(scope, 'Loss', prediction_dict.values()):
(groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_masks_list
) = self._format_groundtruth_data(prediction_dict['image_shape'])
loss_dict = self._loss_rpn(
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
groundtruth_boxlists,
groundtruth_classes_with_background_list)
if not self._first_stage_only:
loss_dict.update(
self._loss_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
groundtruth_boxlists,
groundtruth_classes_with_background_list,
prediction_dict['image_shape'],
prediction_dict.get('mask_predictions'),
groundtruth_masks_list,
))
return loss_dict
如果first_stage_only=True只計(jì)算(rpn_localization_loss
和rpn_objectness_loss
)的損失哀蘑。否則計(jì)算所有的損失。
Args:
prediction_dict: 是一個(gè)包含所有預(yù)測(cè)結(jié)果的字典葵第。當(dāng)first_stage_only=True,字典包含 (rpn_box_encodings
,rpn_objectness_predictions_with_background
, rpn_features_to_crop
, image_shape
, anchors
)否則在字典中還會(huì)有(refined_box_encodings
,class_predictions_with_background
, num_proposals
,proposal_boxes
, optionally, mask_predictions
)
scope: 參數(shù)的空間
Returns:
一個(gè)字典包含(first_stage_localization_loss
,
first_stage_objectness_loss
, second_stage_localization_loss
,
second_stage_classification_loss
)
- restore_map(self, from_detection_checkpoint=True)
def restore_map(self, from_detection_checkpoint=True):
if not from_detection_checkpoint:
return self._feature_extractor.restore_from_classification_checkpoint_fn(
self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope)
variables_to_restore = tf.global_variables()
variables_to_restore.append(slim.get_or_create_global_step())
# Only load feature extractor variables to be consistent with loading from
# a classification checkpoint.
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore,
include_patterns=[self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope])
return {var.op.name: var for var in feature_extractor_variables}
從外部的檢查點(diǎn)中導(dǎo)入?yún)?shù)绘迁。
Args:
from_detection_checkpoint: 是否導(dǎo)入完整的檢測(cè)模型檢查點(diǎn)或者從分類模型中導(dǎo)入檢查點(diǎn)完成預(yù)訓(xùn)練的初始化
Returns:
所有從檢查點(diǎn)恢復(fù)的參數(shù)的名字