提供的SSD MobileNet 1 PPN配置非常好,但是我很好奇能否将“骨干网”替换为MobileNet 2。
我在某种程度上已经做到了这一点,并且与相应的 PPN V1 配置相比,我的 PPN V2 配置可以很好地训练(也可以平滑收敛)(即1万步[根据我自己的数据])。
我尝试了各种端点,并在Tensorboard中查看了生成的图形,特别是查看了最后一个MNet2层和第一个PPN MaxPool层之间的连接形状。
对于图像尺寸为512x288的现有PPN MNet1:
对于我来说(例如,下面的文件中有320个硬编码)
我选择的其他MNet2端点生成了可行的图形,但冻结时的文件大小却大不相同。 PPN MaxPool层是根据最后一个MNet2层的形状构造的,因此我从FPN示例中复制了一种方法,该方法添加了最后的卷积来进行处理。
我想了解以下内容:
_create_modified_mobilenet_config
)?我从SSD PPN和FPN示例中获取了代码。
我已经展示了整个特征提取器文件,因为它不大,并且这是唯一需要的重要代码:
import tensorflow as tf
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import feature_map_generators
from object_detection.utils import context_manager
from object_detection.utils import ops
from object_detection.utils import shape_utils
import copy
import functools
from nets.mobilenet import mobilenet
from nets.mobilenet import mobilenet_v2
slim = tf.contrib.slim
# should num_outputs match ssd.box_predictor.depth?
def _create_modified_mobilenet_config( num_outputs=320 ):
conv_op = functools.partial( slim.separable_conv2d, depth_multiplier=1 )
#conv_op = slim.conv2d
conv_defs = copy.deepcopy( mobilenet_v2.V2_DEF )
conv_defs['spec'][-1] = mobilenet.op( conv_op, stride=1, kernel_size=[1, 1], num_outputs=num_outputs )
return conv_defs
class SSDMobileNetV2PpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
def __init__(
self,
is_training,
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
override_base_feature_extractor_hyperparams=False
):
super( SSDMobileNetV2PpnFeatureExtractor, self ).__init__(
is_training=is_training,
depth_multiplier=depth_multiplier,
min_depth=min_depth,
pad_to_multiple=pad_to_multiple,
conv_hyperparams_fn=conv_hyperparams_fn,
reuse_weights=reuse_weights,
use_explicit_padding=use_explicit_padding,
use_depthwise=use_depthwise,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
self._conv_defs = None
if self._use_depthwise:
self._conv_defs = _create_modified_mobilenet_config()
def preprocess(self, resized_inputs):
return (2.0 / 255.0) * resized_inputs - 1.0
def extract_features(self, preprocessed_inputs):
preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs )
final_endpoint = 'layer_19'
feature_endpoint = 'layer_19'
with tf.variable_scope( 'MobilenetV2', reuse=self._reuse_weights ) as scope:
# NB: checked all other MNet feature extractors have: is_training=None
with slim.arg_scope( mobilenet_v2.training_scope( is_training=None, bn_decay=0.9997 ) ), \
slim.arg_scope( [ mobilenet.depth_multiplier ], min_depth=self._min_depth ):
with (
slim.arg_scope( self._conv_hyperparams_fn() )
if self._override_base_feature_extractor_hyperparams
else context_manager.IdentityContextManager()
):
_, image_features = mobilenet_v2.mobilenet_base(
ops.pad_to_multiple( preprocessed_inputs, self._pad_to_multiple ),
final_endpoint=final_endpoint,
min_depth=self._min_depth,
depth_multiplier=self._depth_multiplier,
conv_defs=self._conv_defs,
use_explicit_padding=self._use_explicit_padding,
scope=scope
)
with slim.arg_scope( self._conv_hyperparams_fn() ):
feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
base_feature_map_depth=0,
num_layers=6,
image_features={
'image_features': image_features[ feature_endpoint ]
}
)
return feature_maps.values()