我正在尝试制作姿势估计模型,因此正在编写自己的损失函数,因为我没有找到任何好的解决方案,并且我想学习新的东西。
我认为最好的选择是使用自动编码器网络生成热图。对于我的标签,我使用从关键点生成的热图,作为输出,我使用具有张量的张量,形状为(1,640,640,16)16个热图,每个热图用于一个关键点。
我不知道训练这样的网络的最佳方法是什么(甚至不知道架构是否正确),但是对于损失函数,我唯一能想到的就是互相关。
代码
from tensorflow import keras
import cv2
from keras.models import Model
from keras.callbacks import TensorBoard
from keras.models import load_model
import math
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
from keras.models import Sequential
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D,UpSampling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop,Adam,SGD,Adagrad,Adadelta,Adamax,Nadam
from keras.applications import xception
import tensorflow.keras.backend as K
input_img = Input(shape=(640,640,1))
x1 = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
m1 = MaxPooling2D((2, 2), padding='same')(x1)
x2 = Conv2D(32, (3, 3), activation='relu', padding='same')(m1)
m2 = MaxPooling2D((2, 2), padding='same')(x2)
x3 = Conv2D(32, (3, 3), activation='relu', padding='same')(m2)
m3 = MaxPooling2D((2, 2), padding='same')(x3)
x4 = Conv2D(32, (3, 3), activation='relu', padding='same')(m3)
m4 = MaxPooling2D((2, 2), padding='same')(x4)
x5 = Conv2D(32, (3, 3), activation='relu', padding='same')(m4)
encoded = MaxPooling2D((2, 2), padding='same')(x5)
y00 = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
u00 = UpSampling2D((2, 2))(y00)
y0 = Conv2D(32, (3, 3), activation='relu', padding='same')(u00)
u0 = UpSampling2D((2, 2))(y0)
x4 = Conv2D(32, (3, 3), activation='relu', padding='same')(x4)
w = Add()([u0,x4])
y1 = Conv2D(32, (3, 3), activation='relu', padding='same')(w)
u1 = UpSampling2D((2, 2))(y1)
x3 = Conv2D(32, (3, 3), activation='relu', padding='same')(x3)
w = Add()([u1,x3])
y2 = Conv2D(32, (3, 3), activation='relu', padding='same')(w)
u2 = UpSampling2D((2, 2))(y2)
x2 = Conv2D(32, (3, 3), activation='relu', padding='same')(x2)
w = Add()([u2,x2])
y3 = Conv2D(32, (3, 3), activation='relu', padding='same')(w)
u3 = UpSampling2D((2, 2))(y3)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same')(x1)
w = Add()([u3,x1])
decoded = Conv2D(16, (3, 3), activation='sigmoid',padding='same')(w)
autoencoder = Model(input_img, decoded)
images = np.random.rand(10, 640, 640)
labels = []
points = np.random.randint(0, 640, (10, 16, 640, 640))
def make_dataset_gaussian(size, fwhm, center):
x = np.arange(0, size, 1, float)
y = x[:,np.newaxis]
x0 = center[0]
y0 = center[1]
return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)
for label in points:
tmp_label = []
for keypoint in label:
tmp_label.append(make_dataset_gaussian(640, 15, [keypoint[0], keypoint[1]]))
labels.append(np.array(tmp_label).reshape((1, 640, 640, 16)))
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
def argmax_2d(tensor):
flat_tensor = tf.reshape(tensor, (tf.shape(tensor)[0] ** 2,))
argmax = tf.cast(tf.argmax(flat_tensor), tf.int32)
argmax_x = argmax // tf.shape(tensor)[0]
argmax_y = argmax % tf.shape(tensor)[0]
return tf.stack((argmax_x, argmax_y))
def cross_correlation(a, b):
a = tf.cast(a, tf.complex64)
b = tf.cast(b, tf.complex64)
fft_x = tf.signal.fft2d(a)
fft_y = tf.signal.fft2d(b)
return tf.math.real(tf.signal.ifft2d(fft_x*fft_y))
def my_loss(x_true, y_predicted):
x_true = tf.reshape(x_true, (1, 16, 640, 640))
y_predicted = tf.reshape(y_predicted, (1, 16, 640, 640))
error = tf.cast(0, tf.float64)
for y in range(len(x_true)):
for x in range(len(x_true[y])):
cross = cross_correlation(x_true[y][x], y_predicted[y][x])
position = argmax_2d(cross)
error += tf.cast(position[0] + position[1], tf.float64)
return error
autoencoder.summary()
autoencoder.compile(optimizer='adam',
loss=my_loss,
metrics=['accuracy'])
autoencoder.fit(dataset.batch(1), epochs=10, batch_size=1)
错误
ValueError: in user code:
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step **
outputs = model.train_step(data)
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:756 train_step
_minimize(self.distribute_strategy, tape, self.optimizer, loss,
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:2736 _minimize
gradients = optimizer._aggregate_gradients(zip(gradients, # pylint: disable=protected-access
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:562 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
d:\human-pose-model\venv\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:1270 _filter_grads
raise ValueError("No gradients provided for any variable: %s." %
ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'conv2d_3/kernel:0', 'conv2d_3/bias:0', 'conv2d_4/kernel:0', 'conv2d_4/bias:0', 'conv2d_5/kernel:0', 'conv2d_5/bias:0', 'conv2d_6/kernel:0', 'conv2d_6/bias:0', 'conv2d_7/kernel:0', 'conv2d_7/bias:0', 'conv2d_8/kernel:0', 'conv2d_8/bias:0', 'conv2d_9/kernel:0', 'conv2d_9/bias:0', 'conv2d_10/kernel:0', 'conv2d_10/bias:0', 'conv2d_11/kernel:0', 'conv2d_11/bias:0', 'conv2d_12/kernel:0', 'conv2d_12/bias:0', 'conv2d_13/kernel:0', 'conv2d_13/bias:0', 'conv2d_14/kernel:0', 'conv2d_14/bias:0'].
我非常确定错误是由tf.argmax()
函数引起的,但是我找不到其他任何用于计算互相关损失的解决方案。
我正在使用Python 3.8.2和Tensorflow 2.3.0