faster_rcnn_inception_resnet_v2_feature_extractor.py
在object_detection\models\faster_rcnn_inception_resnet_v2_feature_extractor.py
文件中只有一個類侥涵,那就是FasterRCNNInceptionResnetV2FeatureExtractor
這是FasterRCNNFeatureExtractor
的子類懦趋。
對于不同的CNN基礎模型object_detection\models\
下面有對應的feature_extractor實現(xiàn)。
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets import inception_resnet_v2
slim = tf.contrib.slim
class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN with Inception Resnet v2 feature extractor implementation."""
介紹FasterRCNNInceptionResnetV2FeatureExtractor的方法
import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets import inception_resnet_v2
slim = tf.contrib.slim
class FasterRCNNInceptionResnetV2FeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Faster R-CNN with Inception Resnet v2 feature extractor implementation."""
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
if first_stage_features_stride != 8 and first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 8 or 16.')
super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
def preprocess(self, resized_inputs):
return (2.0 / 255.0) * resized_inputs - 1.0
def _extract_proposal_features(self, preprocessed_inputs, scope):
.........
return rpn_feature_map
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
......
return proposal_classifier_features
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
.........
return variables_to_restore
從上面可以看出FasterRCNNInceptionResnetV2FeatureExtractor有把四個抽象方法實現(xiàn)了虽缕,這些方法在object_detectionAPI源碼閱讀筆記(6-faster_rcnn_meta_arch.py) 中有提到锄弱。
下面就具體介紹。
- init()
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
"""Constructor.
Args:
is_training: See base class.
first_stage_features_stride: See base class.
batch_norm_trainable: See base class.
reuse_weights: See base class.
weight_decay: See base class.
Raises:
ValueError: If `first_stage_features_stride` is not 8 or 16.
"""
if first_stage_features_stride != 8 and first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 8 or 16.')
super(FasterRCNNInceptionResnetV2FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
初始化函數(shù)就是把FasterRCNNInceptionResnetV2FeatureExtractor和FasterRCNNInceptionResnetV2FeatureExtractor進行初始化并且first_stage_features_stride必須是第8和16的特征圖社证,其他層的特征圖會報錯油啤。
- preprocess(self, resized_inputs):
def preprocess(self, resized_inputs):
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: A [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
return (2.0 / 255.0) * resized_inputs - 1.0
這是Faster R-CNN with Inception Resnet v2 的預處理函數(shù).將像素值映射到[-1, 1]范圍(歸一化)。大概就是: resized_inputs = (resized_inputs / 255)*2 - 1這樣算的坯钦。這樣就到[-1,1]了预皇。
- _extract_proposal_features()
def _extract_proposal_features(self, preprocessed_inputs, scope):
'''
Args:
preprocessed_inputs: tensor的shape= [batch, height, width, channels]损敷,這里的preprocessed_inputs是歸一的tensor。
scope: 變量的空間名
Returns:
rpn_feature_map: 輸出tensor shape = [batch, height, width, depth]深啤,將用于RPN網(wǎng)絡進行特征提取。
'''
if len(preprocessed_inputs.get_shape().as_list()) != 4:
raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a '
'tensor of shape %s' % preprocessed_inputs.get_shape())
with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
weight_decay=self._weight_decay)):
# Forces is_training to False to disable batch norm update.
with slim.arg_scope([slim.batch_norm],
is_training=self._train_batch_norm):
with tf.variable_scope('InceptionResnetV2',
reuse=self._reuse_weights) as scope:
# 通過 reuse=self._reuse_weights設置
#variable_scope可以實現(xiàn)同一個name_scope中的變量的共享
rpn_feature_map, _ = (
inception_resnet_v2.inception_resnet_v2_base(
preprocessed_inputs, final_endpoint='PreAuxLogits',
scope=scope, output_stride=self._first_stage_features_stride,
align_feature_maps=True))
return rpn_feature_map
這是提取第一階段將用于RPN的特征路星,返回feature map溯街。實現(xiàn)faster_rcnn_meta_arch中的抽象方法使用Inception Resnet v2網(wǎng)絡的前半部分提取特征[將用于RPN的特征]。
如果在align_feature_maps = True
模式下構(gòu)建網(wǎng)絡洋丐,卷積的VALID變成SAME模式呈昔,以便特征映射對齊。
- _extract_box_classifier_features():
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
"""
提取將用于第二階段框分類器的特征友绝。
這個方法重建了Inception ResNet v2的“后半部分”網(wǎng)絡——
`_extract_proposal_features`中定義的就是那“后半部分”堤尾。相當于原論文中的ROIPooling及其之后的層。
Args:
proposal_feature_maps: 用于裁剪出各個proposal的特征圖
[batch_size * self.max_num_proposals, crop_height, crop_width, depth]
scope: A scope name.
Returns:
proposal_classifier_features: 分了類的proposal
[batch_size * self.max_num_proposals, height, width, depth]
"""
with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights):
with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
weight_decay=self._weight_decay)):
# Forces is_training to False to disable batch norm update.
with slim.arg_scope([slim.batch_norm],
is_training=self._train_batch_norm):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
with tf.variable_scope('Mixed_7a'):
with tf.variable_scope('Branch_0'):
tower_conv = slim.conv2d(proposal_feature_maps,
256, 1, scope='Conv2d_0a_1x1')
tower_conv_1 = slim.conv2d(
tower_conv, 384, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
tower_conv1 = slim.conv2d(
proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(
tower_conv1, 288, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
tower_conv2 = slim.conv2d(
proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
scope='Conv2d_0b_3x3')
tower_conv2_2 = slim.conv2d(
tower_conv2_1, 320, 3, stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_3'):
tower_pool = slim.max_pool2d(
proposal_feature_maps, 3, stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat(
[tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20)
net = inception_resnet_v2.block8(net, activation_fn=None)
proposal_classifier_features = slim.conv2d(
net, 1536, 1, scope='Conv2d_7b_1x1')
return proposal_classifier_features
在這個方法的末尾使用一個1x1的卷積核進行卷積生成1536通道的特征圖迁客。
注意這里的輸入tensor的shape=[batch_size * self.max_num_proposals, crop_height, crop_width, depth] 郭宝,說明這里的輸入已經(jīng)是經(jīng)過區(qū)域提取的網(wǎng)絡,每個批次有batch_size * self.max_num_proposals張?zhí)卣鲌D掷漱。 輸出是tensor是proposal_classifier_features: 已經(jīng)分了類的分類特征的shape=[batch_size * self.max_num_proposals, height, width, depth]
- restore_from_classification_checkpoint_fn()
def restore_from_classification_checkpoint_fn(
self,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
variables_to_restore = {}
for variable in tf.global_variables():
if variable.op.name.startswith(
first_stage_feature_extractor_scope):
var_name = variable.op.name.replace(
first_stage_feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
if variable.op.name.startswith(
second_stage_feature_extractor_scope):
var_name = variable.op.name.replace(
second_stage_feature_extractor_scope
+ '/InceptionResnetV2/Repeat', 'InceptionResnetV2/Repeat_2')
var_name = var_name.replace(
second_stage_feature_extractor_scope + '/', '')
variables_to_restore[var_name] = variable
return variables_to_restore
這個方法覆蓋了基類的方法粘室,同時復用了_extract_box_classifier_features()方法構(gòu)建的命名空間,這里所用到的參數(shù)權(quán)重均是_extract_box_classifier_features()構(gòu)建的卜范。
Args: first_stage_feature_extractor_scope: 第一階段的命名空間 second_stage_feature_extractor_scope: 第二階段的命名空間衔统。 Returns: 返回了一個權(quán)重參數(shù)字典。
這個文件里面就是對特征的提取海雪,所有提取的去處怎么用在object_detectionAPI源碼閱讀筆記(6-faster_rcnn_meta_arch.py)提到锦爵。
FasterRCNNInceptionResnetV2FeatureExtractor是提供材料的一個類,怎么進行預測奥裸,檢測险掀,預處理在FasterRCNNMetaArch這個類實現(xiàn)了。