
时间:2018-11-30 13:32:37

标签: tensorflow machine-learning keras deep-learning object-detection



  1. 将图像画布的尺寸从28x28增加到100x100
  2. 将手写数字移动到100x100图片中的随机位置
  3. 创建地面真相边界框


Step 1 and 2: Increasing image canvas size and moving digit to random position


Step 3: Ground truth bounding boxes


y = [p, x, y, w, h, c0, ..., c9]

其中p =对象的概率,(x,y,w,h)=边界框中心,宽度和高度占图像大小的一部分,c0-c9 =类概率(每个数字一位)。


但是,当我尝试训练模型时,出现了神秘的错误tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [15] vs. [200],其中[15]是我最后一层中的节点数,而[200]是我指定的批次大小训练(我通过更改值并再次运行来验证这一点)。它们不可能合理地相同,所以我想我在模型中的张量维方面错过了一些至关重要的事情,但是我无法弄清楚是什么。




import numpy as np

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras import backend as K

def increase_image_size(im_set, new_size):
    num_images = im_set.shape[0]
    orig_size = im_set[0].shape[0]
    im_stack = np.zeros((num_images, new_size, new_size), dtype='uint8')

    # Put MNIST digits at random positions in new images
    for i in range(num_images):
        x0 = int(np.random.random() * (new_size - orig_size - 1))
        y0 = int(np.random.random() * (new_size - orig_size - 1))
        x1 = x0 + orig_size
        y1 = y0 + orig_size

        im_stack[i, y0:y1, x0:x1] = im_set[i]

    return im_stack

# Get bounding box annotations from images and object labels
def get_image_annotations(X_train, y_train):
    num_images = len(X_train)
    annotations = np.zeros((num_images, 15), dtype='float')
    for i in range(num_images):
        annotations[i] = get_image_annotation(X_train[i], y_train[i])
    return annotations

def get_image_annotation(X, y):
    sz_y, sz_x = X.shape

    y_indices, x_indices = np.where(X > 0)

    y_min = max(np.min(y_indices) - 1, 0)
    y_max = min(np.max(y_indices) + 1, sz_y)
    x_min = max(np.min(x_indices) - 1, 0)
    x_max = min(np.max(x_indices) + 1, sz_x)

    bb_x = (x_min + x_max) / 2.0 / sz_x
    bb_y = (y_min + y_max) / 2.0 / sz_y

    bb_w = (x_max - x_min) / sz_x
    bb_h = (y_max - y_min) / sz_y

    classes = np.zeros(10, dtype='float')
    classes[y] = 1

    output = np.concatenate(([1, bb_x, bb_y, bb_w, bb_h], classes))
    return output

def custom_cost_function(y_true, y_pred):
    p_p = y_pred[0]
    x_p = y_pred[1]
    y_p = y_pred[2]
    w_p = y_pred[3]
    h_p = y_pred[4]

    p_t = y_true[0]
    x_t = y_true[1]
    y_t = y_true[2]
    w_t = y_true[3]
    h_t = y_true[4]

    c_pred = y_pred[5:]
    c_true = y_true[5:]

    c1 = K.sum((c_pred - c_true) * (c_pred - c_true))
    c2 = (x_p - x_t) * (x_p - x_t) + (y_p - y_t) * (y_p - y_t) \
         + (K.sqrt(w_p) - K.sqrt(w_t)) * (K.sqrt(w_p) - K.sqrt(w_t)) \
         + (K.sqrt(h_p) - K.sqrt(h_t)) * (K.sqrt(h_p) - K.sqrt(h_t))

    lambda_class = 1.0
    lambda_coord = 1.0

    return lambda_class * c1 + lambda_coord * c2

def baseline_model():
    # create model
    model = Sequential()
    model.add(Conv2D(32, (5, 5), input_shape=(1, 100, 100), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(15, activation='linear'))
    # Compile model
    model.compile(loss=custom_cost_function, optimizer='adam', metrics=['accuracy'])
    return model

def mnist_object_detection():

    # fix random seed for reproducibility

    # Load data
    print("Loading data")
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # Adjust input images
    print("Adjust input images (increasing image sizes and moving digits)")
    X_train = increase_image_size(X_train, 100)
    X_test = increase_image_size(X_test, 100)

    print("Creating annotations")
    y_train_prim = get_image_annotations(X_train, y_train)
    y_test_prim = get_image_annotations(X_test, y_test)

    # reshape to be [samples][pixels][width][height]
    X_train = X_train.reshape(X_train.shape[0], 1, 100, 100).astype('float32')
    X_test = X_test.reshape(X_test.shape[0], 1, 100, 100).astype('float32')

    # normalize inputs from 0-255 to 0-1
    X_train = X_train / 255
    X_test = X_test / 255

    # build the model
    print("Building model")
    model = baseline_model()
    # Fit the model
    print("Training model")
    model.fit(X_train, y_train_prim, validation_data=(X_test, y_test_prim), epochs=10, batch_size=200, verbose=1)

if __name__ == '__main__':


/Users/gedda/anaconda3/envs/keras-obj-det/bin/pythonn /Users/gedda/devel/tensorflow/digit-recognition/object_detection_reduced.py
Using TensorFlow backend.
Loading data
Adjust input images (increasing image sizes and moving digits)
Creating annotations
Building model
2018-11-30 13:26:34.030159: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
2018-11-30 13:26:34.030463: I tensorflow/core/common_runtime/process_util.cc:69] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.
Training model
Train on 60000 samples, validate on 10000 samples
Epoch 1/3
Traceback (most recent call last):
  File "/Users/gedda/devel/tensorflow/digit-recognition/object_detection_reduced.py", line 140, in <module>
  File "/Users/gedda/devel/tensorflow/digit-recognition/object_detection_reduced.py", line 136, in mnist_object_detection
    model.fit(X_train, y_train_prim, validation_data=(X_test, y_test_prim), epochs=3, batch_size=200, verbose=1)
  File "/Users/gedda/anaconda3/envs/keras-obj-det/lib/python3.6/site-packages/keras/engine/training.py", line 1039, in fit
  File "/Users/gedda/anaconda3/envs/keras-obj-det/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 199, in fit_loop
outs = f(ins_batch)
  File "/Users/gedda/anaconda3/envs/keras-obj-det/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
return self._call(inputs)
  File "/Users/gedda/anaconda3/envs/keras-obj-det/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2675, in _call
fetched = self._callable_fn(*array_vals)
  File "/Users/gedda/anaconda3/envs/keras-obj-det/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1439, in __call__
  File "/Users/gedda/anaconda3/envs/keras-obj-det/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [15] vs. [200]
     [[{{node training/Adam/gradients/loss/dense_2_loss/mul_7_grad/BroadcastGradientArgs}} = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/Adam/gradients/loss/dense_2_loss/mul_7_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/Adam/gradients/loss/dense_2_loss/mul_7_grad/Shape, training/Adam/gradients/loss/dense_2_loss/mul_7_grad/Shape_1)]]

Process finished with exit code 1

1 个答案:

答案 0 :(得分:2)



def custom_cost_function(y_true, y_pred):
    p_p = y_pred[:,0]
    x_p = y_pred[:,1]
    y_p = y_pred[:,2]
    w_p = y_pred[:,3]
    h_p = y_pred[:,4]

    p_t = y_true[:,0]
    x_t = y_true[:,1]
    y_t = y_true[:,2]
    w_t = y_true[:,3]
    h_t = y_true[:,4]

    c_pred = y_pred[:,5:]
    c_true = y_true[:,5:]
