faster_rcnn_meta_arch.py
前面看到的modle.py/DetectionModel
是所有檢測(cè)模型的基類拴曲。而在faster_rcnn_meta_arch.py/FasterRCNNMetaArch
就是DetectionModel
的子類窗骑。
在object_detection\meta_architectures\faster_rcnn_meta_arch.py中定義了兩個(gè)類:
先看faster_rcnn_meta_arch.py導(dǎo)入的文件:
#faster_rcnn_meta_arch.py
from abc import abstractmethod
from functools import partial
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import box_predictor
from object_detection.core import losses
from object_detection.core import model
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner
from object_detection.utils import ops
from object_detection.utils import shape_utils
導(dǎo)入了core的中的基本構(gòu)建:
faster_rcnn_meta_arch.py文件中定義的類:
#faster_rcnn_meta_arch.py
class FasterRCNNFeatureExtractor(object):
"""Faster R-CNN Feature Extractor definition."""
.....
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
.....
這里只有兩個(gè)類:
- FasterRCNNMetaArch(model.DetectionModel):
是DetectionModel的基類實(shí)現(xiàn)了FasterRCNN的所有流程:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
阳啥,是這個(gè)文件的重點(diǎn)扎瓶。 - FasterRCNNFeatureExtractor(object):
是一個(gè)基類乍赫,從名字上就知道是特這提取的叠殷。有很多方法并具體實(shí)現(xiàn)壶冒。這個(gè)類被object_detection\models中的各個(gè)類具體檢測(cè)模型實(shí)現(xiàn)了。
例如在fobject_detection\models\faster_rcnn_inception_resnet_v2_feature_extractor.py中就實(shí)現(xiàn)了FasterRCNNFeatureExtractor的具體方法墅诡。
#faster_rcnn_inception_resnet_v2_feature_extractor.py
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets import inception_resnet_v2
slim = tf.contrib.slim
class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN with Inception Resnet v2 feature extractor implementation."""
這里FasterRCNNInceptionResnetV2FeatureExtractor是model_builder.py中使用的其中一個(gè)提取器焙糟。
class FasterRCNNFeatureExtractor(object):
"""Faster R-CNN Feature Extractor definition."""
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
self._is_training = is_training
self._first_stage_features_stride = first_stage_features_stride
self._train_batch_norm = (batch_norm_trainable and is_training)
self._reuse_weights = reuse_weights
self._weight_decay = weight_decay
@abstractmethod
def preprocess(self, resized_inputs):
pass
def extract_proposal_features(self, preprocessed_inputs, scope):
with tf.variable_scope(scope, values=[preprocessed_inputs]):
return self._extract_proposal_features(preprocessed_inputs, scope)
@abstractmethod
def _extract_proposal_features(self, preprocessed_inputs, scope):
pass
def extract_box_classifier_features(self, proposal_feature_maps, scope):
with tf.variable_scope(scope, values=[proposal_feature_maps]):
return self._extract_box_classifier_features(proposal_feature_maps, scope)
@abstractmethod
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
pass
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
variables_to_restore = {}
for variable in tf.global_variables():
for scope_name in [first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope]:
if variable.op.name.startswith(scope_name):
var_name = variable.op.name.replace(scope_name + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
FasterRCNNFeatureExtractor的方法解析
- __init____()方法:
Args:
is_training: 構(gòu)建圖的訓(xùn)練版本
first_stage_features_stride: 從那個(gè)的feature_map提取特征圖攻礼。
batch_norm_trainable: 啟用BN
reuse_weights: 重用參數(shù)
weight_decay: 訓(xùn)練參數(shù)衰減率 (default: 0.0). - preprocess(self, resized_inputs)方法:
特征提取器特定的預(yù)處理方法(裁剪圖像). 這里是特征圖的預(yù)處理太伊,與DetectionModel的預(yù)處理不同。這是抽象方法會(huì)在FasterRCNNInceptionResnetV2FeatureExtractor實(shí)現(xiàn)谅年。具體請(qǐng)查看object_detectionAPI源碼閱讀筆記(8-faster_rcnn_inception_resnet_v2_feature_extractor.py - extract_proposal_features(self, preprocessed_inputs, scope)方法:
提取用于第一階段RPN的特征。
該方法負(fù)責(zé)從preprocessed的圖像中提取特征圖肮韧。提取出來的特征圖會(huì)被送到 RPN融蹂。region proposal network(RPN)使用這些特征來預(yù)測(cè)proposal旺订。
Args:
preprocessed_inputs: 特征圖shape=[batch, height, width, channels]
scope: 參數(shù)空間的name
Returns:
rpn_feature_map:提取的特診圖shape= [batch, height, width, depth] - extract_box_classifier_features(self,proposal_feature_maps, scope)方法:
提取用于第二階段分類的特征圖。
Args:
proposal_feature_maps: [batch_size * self.max_num_proposals, crop_height, crop_width, depth]這里是代表剪裁到特定大小的特征圖
scope: 參數(shù)空間
Returns:
proposal_classifier_features: [batch_size * self.max_num_proposals, height, width, depth]每個(gè)建議框的分類器特征殿较。 - restore_from_classification_checkpoint_fn()方法:
Args:
first_stage_feature_extractor_scope: 看名字吧
feature extractor.
second_stage_feature_extractor_scope: 看名字
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
FasterRCNNMetaArch()方法解析
這個(gè)類在faster_rcnn_meta_arch.py是一個(gè)大頭耸峭,也是整個(gè)檢測(cè)模型的精華所在,但從代碼實(shí)現(xiàn)的行數(shù)來看淋纲,包括注解一共有1400行代碼劳闹。先緩緩!洽瞬!
- FasterRCNNMetaArch 的結(jié)構(gòu)
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
@property
def first_stage_feature_extractor_scope(self):
return 'FirstStageFeatureExtractor'
@property
def second_stage_feature_extractor_scope(self):
return 'SecondStageFeatureExtractor'
@property
def first_stage_box_predictor_scope(self):
return 'FirstStageBoxPredictor'
@property
def second_stage_box_predictor_scope(self):
return 'SecondStageBoxPredictor'
@property
def max_num_proposals(self):
if self._is_training and not self._hard_example_miner:
return self._second_stage_batch_size
return self._first_stage_max_proposals
def preprocess(self, inputs):
........
return self._feature_extractor.preprocess(resized_inputs)
def predict(self, preprocessed_inputs):
......
return prediction_dict
def _predict_second_stage(self, rpn_box_encodings,
rpn_objectness_predictions_with_background,
rpn_features_to_crop,
anchors,
image_shape):
.....
return prediction_dict
def _extract_rpn_feature_maps(self, preprocessed_inputs):
......
return (rpn_box_predictor_features, rpn_features_to_crop,
anchors, image_shape)
def _predict_rpn_proposals(self, rpn_box_predictor_features):
......
return (tf.squeeze(box_encodings, axis=2),
objectness_predictions_with_background)
def _remove_invalid_anchors_and_predictions(
self,
box_encodings,
objectness_predictions_with_background,
anchors_boxlist,
clip_window):
......
return (_batch_gather_kept_indices(box_encodings),
_batch_gather_kept_indices(objectness_predictions_with_background),
pruned_anchors_boxlist)
def _flatten_first_two_dimensions(self, inputs):
.......
return tf.reshape(inputs, flattened_shape)
def postprocess(self, prediction_dict):
with tf.name_scope('FirstStagePostprocessor'):
image_shape = prediction_dict['image_shape']
if self._first_stage_only:
proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn(
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
image_shape)
return {
'detection_boxes': proposal_boxes,
'detection_scores': proposal_scores,
'num_detections': tf.to_float(num_proposals)
}
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shape,
mask_predictions=mask_predictions)
return detections_dict
def _postprocess_rpn(self,
rpn_box_encodings_batch,
rpn_objectness_predictions_with_background_batch,
anchors,
image_shape):
......
return proposal_boxes, proposal_scores, num_proposals
def _unpad_proposals_and_sample_box_classifier_batch(
self,
proposal_boxes,
proposal_scores,
num_proposals,
groundtruth_boxlists,
groundtruth_classes_with_background_list):
.......
return (tf.stack(single_image_proposal_box_sample),
tf.stack(single_image_proposal_score_sample),
tf.stack(single_image_num_proposals_sample))
def _format_groundtruth_data(self, image_shape):
.....
return (groundtruth_boxlists, groundtruth_classes_with_background_list,
groundtruth_masks_list)
def _sample_box_classifier_minibatch(self,
proposal_boxlist,
groundtruth_boxlist,
groundtruth_classes_with_background):
.......
return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices)
def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized):
.......
return slim.max_pool2d(
cropped_regions,
[self._maxpool_kernel_size, self._maxpool_kernel_size],
stride=self._maxpool_stride)
def _postprocess_box_classifier(self,
refined_box_encodings,
class_predictions_with_background,
proposal_boxes,
num_proposals,
image_shape,
mask_predictions=None):
.......
return detections
def _batch_decode_boxes(self, box_encodings, anchor_boxes):
......
return tf.reshape(decoded_boxes.get(),
tf.stack([combined_shape[0], combined_shape[1],
num_classes, 4]))
def loss(self, prediction_dict, scope=None):
.......
return loss_dict
def _loss_rpn(self,
rpn_box_encodings,
rpn_objectness_predictions_with_background,
anchors,
groundtruth_boxlists,
groundtruth_classes_with_background_list):
.......
return loss_dict
def _loss_box_classifier(self,
refined_box_encodings,
class_predictions_with_background,
proposal_boxes,
num_proposals,
groundtruth_boxlists,
groundtruth_classes_with_background_list,
image_shape,
prediction_masks=None,
groundtruth_masks_list=None):
.......
return loss_dict
def _padded_batched_proposals_indicator(self,
num_proposals,
max_num_proposals):
......
return tf.greater(tiled_num_proposals, tiled_proposal_index)
def _unpad_proposals_and_apply_hard_mining(self,
proposal_boxlists,
second_stage_loc_losses,
second_stage_cls_losses,
num_proposals):
for (proposal_boxlist, single_image_loc_loss, single_image_cls_loss,
single_image_num_proposals) in zip(
proposal_boxlists,
tf.unstack(second_stage_loc_losses),
tf.unstack(second_stage_cls_losses),
tf.unstack(num_proposals)):
proposal_boxlist = box_list.BoxList(
tf.slice(proposal_boxlist.get(),
[0, 0], [single_image_num_proposals, -1]))
single_image_loc_loss = tf.slice(single_image_loc_loss,
[0], [single_image_num_proposals])
single_image_cls_loss = tf.slice(single_image_cls_loss,
[0], [single_image_num_proposals])
return self._hard_example_miner(
location_losses=tf.expand_dims(single_image_loc_loss, 0),
cls_losses=tf.expand_dims(single_image_cls_loss, 0),
decoded_boxlist_list=[proposal_boxlist])
def restore_map(self, from_detection_checkpoint=True):
.......
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore,
include_patterns=[self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope])
return {var.op.name: var for var in feature_extractor_variables}
雖然有這么多的方法但是可供調(diào)用的卻只有少數(shù)幾個(gè)
- 主要方法
class FasterRCNNMetaArch(model.DetectionModel):
"""Faster R-CNN Meta-architecture definition."""
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
@property
def first_stage_feature_extractor_scope(self):
return 'FirstStageFeatureExtractor'
@property
def second_stage_feature_extractor_scope(self):
return 'SecondStageFeatureExtractor'
@property
def first_stage_box_predictor_scope(self):
return 'FirstStageBoxPredictor'
@property
def second_stage_box_predictor_scope(self):
return 'SecondStageBoxPredictor'
@property
def max_num_proposals(self):
if self._is_training and not self._hard_example_miner:
return self._second_stage_batch_size
return self._first_stage_max_proposals
def preprocess(self, inputs):
........
return self._feature_extractor.preprocess(resized_inputs)
def predict(self, preprocessed_inputs):
......
return prediction_dict
def postprocess(self, prediction_dict):
with tf.name_scope('FirstStagePostprocessor'):
image_shape = prediction_dict['image_shape']
if self._first_stage_only:
proposal_boxes, proposal_scores, num_proposals = self._postprocess_rpn(
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'],
image_shape)
return {
'detection_boxes': proposal_boxes,
'detection_scores': proposal_scores,
'num_detections': tf.to_float(num_proposals)
}
with tf.name_scope('SecondStagePostprocessor'):
mask_predictions = prediction_dict.get(box_predictor.MASK_PREDICTIONS)
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shape,
mask_predictions=mask_predictions)
return detections_dict
def loss(self, prediction_dict, scope=None):
.......
return loss_dict
def restore_map(self, from_detection_checkpoint=True):
.......
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore,
include_patterns=[self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope])
return {var.op.name: var for var in feature_extractor_variables}
從這里可以看出加上基類的方法本涕,其實(shí)這個(gè)類也就只有7-8個(gè)是供外部調(diào)用的方法,一般供外部調(diào)用的方法才是主要方法伙窃。
0.max_num_proposals,1.preprocess,2.predict,3.postprocess,4.loss,5.restore_map,6.provide_groundtruth(基類),7.groundtruth_lists(基類)
看一下具體的流程圖:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
這里再次引用haixwang總結(jié)的流程圖:
這里需要牢記這張圖菩颖,整個(gè)因?yàn)檫@就是FasterRCNNMetaArch類的大概的實(shí)現(xiàn)流程了。
- 主要方法逐一介紹
-
init():
參數(shù)提前介紹:
Args:
is_training: 是否構(gòu)建一個(gè)訓(xùn)練版本特征圖为障。
num_classes: Number of classes. 不包括背景類晦闰。
image_resizer_fn: 調(diào)用一個(gè)對(duì)圖片進(jìn)行resize的函數(shù)輸入[height, width, channels],可能會(huì)改變圖片的色彩空間鳍怨。 See
builders/image_resizer_builder.py.
feature_extractor: A FasterRCNNFeatureExtractor object.就是上面介紹的FasterRCNNFeatureExtractor的一個(gè)子類呻右。
first_stage_only: 是否僅僅構(gòu)建Region Proposal Networ(RPN)的一部分.
first_stage_anchor_generator: An anchor_generator.AnchorGenerator object (note that currently we only support grid_anchor_generator.GridAnchorGenerator objects)
first_stage_atrous_rate: (This should typically be set to 1).這是多空卷積的一個(gè)參數(shù),如果設(shè)置為1鞋喇,表示普通卷積声滥。
first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,separable_conv2d and fully_connected ops for the RPN box predictor.設(shè)置參數(shù)空間的名字。
first_stage_box_predictor_kernel_size: 在RPN框預(yù)測(cè)之前的用于卷積運(yùn)算的卷積核大小侦香。
first_stage_box_predictor_depth: 在RPN框預(yù)測(cè)之前的用于卷積運(yùn)算的輸出維度落塑。
first_stage_minibatch_size: 用于計(jì)算區(qū)域建議網(wǎng)絡(luò)的內(nèi)容是前景還是背景和location loss 的“batch size”。 這個(gè)“batch size”是指在圖像批次中為給定的圖像計(jì)算損失函數(shù)的錨點(diǎn)的數(shù)量罐韩。
first_stage_positive_balance_fraction: 正樣本的比例
first_stage_nms_score_threshold: 第一階段的非極大抑制值在[0,1]憾赁。The recommended value for Faster R-CNN is 0.
first_stage_nms_iou_threshold: 對(duì)于RPN預(yù)測(cè)出來的box應(yīng)用非最大抑制的IOU值的閥值 (與得分最高的框的IOU超過摸個(gè)閥值都會(huì)被刪除)
first_stage_max_proposals: 在區(qū)域提議網(wǎng)絡(luò)(RPN)預(yù)測(cè)的方框上執(zhí)行非最大抑制(NMS)后要保留的框的最大數(shù)量。
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
initial_crop_size: 在ROI進(jìn)行裁剪的時(shí)候的尺寸
maxpool_kernel_size: A single integer indicating the kernel size of the max pool op on the cropped feature map during ROI pooling.
maxpool_stride: A single integer indicating the stride of the max pool op on the cropped feature map during ROI pooling.
second_stage_mask_rcnn_box_predictor: Mask R-CNN box predictor to use for the second stage.
second_stage_batch_size: 第二階段的進(jìn)行classification and refined location loss的時(shí)候的batch_size散吵。
second_stage_balance_fraction: 每張圖片中的正樣本的數(shù)量的比例缠沈。
second_stage_non_max_suppression_fn: 非極大值抑制方程。
second_stage_score_conversion_fn: 非線性方程错蝴,把 logits轉(zhuǎn)換為probabilities.
second_stage_localization_loss_weight: second stage localization loss的比例因子.
second_stage_classification_loss_weight: second stage classification loss的比例因子.
second_stage_classification_loss:損失值洲愤,包括losses.WeightedSigmoidClassificationLoss 和
losses.WeightedSoftmaxClassificationLoss.
second_stage_mask_prediction_loss_weight: mask的一個(gè)loss權(quán)重
hard_example_miner: A losses.HardExampleMiner object (can be None).
parallel_iterations: (Optional) The number of iterations allowed to runin parallel for calls to tf.map_fn.
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` attraining time.
ValueError: If first_stage_anchor_generator is not of typegrid_anchor_generator.GridAnchorGenerator.
init函數(shù)的解釋
def __init__(self,
is_training,
num_classes,
image_resizer_fn,
feature_extractor,
first_stage_only,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_positive_balance_fraction,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
second_stage_mask_rcnn_box_predictor,
second_stage_batch_size,
second_stage_balance_fraction,
second_stage_non_max_suppression_fn,
second_stage_score_conversion_fn,
second_stage_localization_loss_weight,
second_stage_classification_loss_weight,
second_stage_classification_loss,
second_stage_mask_prediction_loss_weight=1.0,
hard_example_miner=None,
parallel_iterations=16):
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
# 檢查參數(shù)是否正確
if is_training and second_stage_batch_size > first_stage_max_proposals:
raise ValueError('second_stage_batch_size should be no greater than '
'first_stage_max_proposals.')
if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type '
'grid_anchor_generator.GridAnchorGenerator.')
# 獲取參數(shù),這些都是設(shè)置參數(shù)
self._is_training = is_training
self._image_resizer_fn = image_resizer_fn # 圖片resize函數(shù)
self._feature_extractor = feature_extractor # feature_extractor提取函數(shù)顷锰,在上面有介紹
self._first_stage_only = first_stage_only # 是否只進(jìn)行區(qū)域提取
# The first class is reserved as background.
# 設(shè)置第一個(gè)類為背景類
unmatched_cls_target = tf.constant(
[1] + self._num_classes * [0], dtype=tf.float32)
# target_assigner是創(chuàng)建任務(wù)的類
self._proposal_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN', 'proposal')
self._detector_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN', 'detection', unmatched_cls_target=unmatched_cls_target)
# Both proposal and detector target assigners use the same box coder
self._box_coder = self._proposal_target_assigner.box_coder
# (First stage) Region proposal network parameters
# 獲取第一階段的anchor_generator生成器
self._first_stage_anchor_generator = first_stage_anchor_generator
self._first_stage_atrous_rate = first_stage_atrous_rate
self._first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope)
self._first_stage_box_predictor_kernel_size = (
first_stage_box_predictor_kernel_size)
self._first_stage_box_predictor_depth = first_stage_box_predictor_depth
self._first_stage_minibatch_size = first_stage_minibatch_size
# 在這里進(jìn)行正負(fù)樣本的采樣
self._first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=first_stage_positive_balance_fraction)
self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor(
self._is_training, num_classes=1,
conv_hyperparams=self._first_stage_box_predictor_arg_scope,
min_depth=0, max_depth=0, num_layers_before_predictor=0,
use_dropout=False, dropout_keep_prob=1.0, kernel_size=1,
box_code_size=self._box_coder.code_size)
# 第一階段的非極大抑制值柬赐,iou,最大推薦區(qū)域數(shù)量
self._first_stage_nms_score_threshold = first_stage_nms_score_threshold
self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
self._first_stage_max_proposals = first_stage_max_proposals
# 產(chǎn)生WeightedSmoothL1LocalizationLoss和WeightedSoftmaxClassificationLoss
self._first_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
self._first_stage_objectness_loss = (
losses.WeightedSoftmaxClassificationLoss(anchorwise_output=True))
self._first_stage_loc_loss_weight = first_stage_localization_loss_weight
self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
# Per-region cropping parameters
# 設(shè)置ROI的大小
self._initial_crop_size = initial_crop_size
self._maxpool_kernel_size = maxpool_kernel_size
self._maxpool_stride = maxpool_stride
self._mask_rcnn_box_predictor = second_stage_mask_rcnn_box_predictor
# 還是提取第二階段的參數(shù)官紫,
self._second_stage_batch_size = second_stage_batch_size
self._second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=second_stage_balance_fraction)
# 第二階段非極大抑制值肛宋,iou州藕,最大推薦區(qū)域數(shù)量
self._second_stage_nms_fn = second_stage_non_max_suppression_fn
self._second_stage_score_conversion_fn = second_stage_score_conversion_fn
# 第二階段的loss
self._second_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss(anchorwise_output=True))
self._second_stage_classification_loss = second_stage_classification_loss
self._second_stage_mask_loss = (
losses.WeightedSigmoidClassificationLoss(anchorwise_output=True))
self._second_stage_loc_loss_weight = second_stage_localization_loss_weight
self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
self._second_stage_mask_loss_weight = (
second_stage_mask_prediction_loss_weight)
self._hard_example_miner = hard_example_miner
self._parallel_iterations = parallel_iterations
為防止篇幅引起不適,分為兩篇酝陈。請(qǐng)看object_detectionAPI源碼閱讀筆記(7-FasterRCNNMetaArch類的詳解)