我有以下用于分割灰度PNG图像的U-Net。
import cv2
import os
from sklearn.utils import shuffle
import tensorflow as tf
import numpy as np
OVERALLSIZE = int(float(input('Choose the number of images you want (<5635) : ')))
PATH = input('give absolute path to image')
TESTSIZE = int(float(input('Choose the number of the data you want to use as test (<5635) : ')))
######################################################################################################################
images = [img for img in os.listdir(PATH + '/Xtrain') if img.endswith('png')]
# put random_state to 1
images = shuffle(images,random_state = 0)
masks = [name[:-4]+'_mask.png' for name in images]
images, masks = images[:OVERALLSIZE], masks[:OVERALLSIZE]
images_, masks_ = [cv2.imread(PATH + '/Xtrain/' + img, cv2.IMREAD_GRAYSCALE).astype(np.int) for img in images], \
[cv2.imread(PATH + '/ytrain/' + msk, cv2.IMREAD_GRAYSCALE).astype(np.int) for msk in masks]
######################################################################################################################
X_train, y_train, X_test, y_test = np.asarray(images_[TESTSIZE:])/255., \
np.asarray(masks_[TESTSIZE:]), \
np.asarray(images_[:TESTSIZE])/255., \
np.asarray(masks_[:TESTSIZE])
x = tf.placeholder(tf.float32, shape=[None, 420, 580], name='x')
y_ = tf.placeholder(tf.float32, shape=[None, 420, 580], name='y_')
sess = tf.InteractiveSession()
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def convoer(inputs, shape, flag):
W = weight_variable(shape)
b = bias_variable([shape[3]])
temp = shape
temp[2] = shape[3]
Wa = weight_variable(temp)
ba = bias_variable([shape[3]])
conv = tf.nn.relu(conv2d(inputs, W) + b)
conv = tf.nn.relu(conv2d(conv, Wa) + ba)
pool = max_pool_2x2(conv)
if flag: return pool
elif not flag: return conv
def upconvoer(inputs, shape, height, width):
W = weight_variable(shape)
b = bias_variable([shape[3]])
temp = shape
temp[2] = shape[3]
Wa = weight_variable(temp)
ba = bias_variable([shape[3]])
up = tf.image.resize_images(inputs, height, width)
conv = tf.nn.relu(conv2d(up, W) + b)
conv = tf.nn.relu(conv2d(conv, Wa) + ba)
return conv
def conv2d(x, W):
return tf.nn.conv2d(x,W,strides = [1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding='SAME')
def U():
inputs = tf.reshape(x, [-1,420,580,1])
pool1 = convoer(inputs, [3,3,1,32], True)
pool2 = convoer(pool1, [3,3,32,64], True)
pool3 = convoer(pool2, [3,3,64,128], True)
pool4 = convoer(pool3, [3,3,128,256], True)
conv5 = convoer(pool4, [3,3,256,512], False)
conv6 = upconvoer(conv5, [3,3,512,256], 73, 53)
conv7 = upconvoer(conv6, [3,3,256,128], 145, 105)
conv8 = upconvoer(conv7, [3,3,128,64], 290, 210)
conv9 = upconvoer(conv8, [3,3,64,32], 420, 580)
W10 = weight_variable([1,1,32,1])
b10 = bias_variable([1])
conv10 = tf.nn.sigmoid(conv2d(conv9, W10) + b10)
y = conv10
return y
y = U()
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess.run(train_step, feed_dict={x: X_train, y_: y_train})
在交叉熵步骤我尝试将3d Tensor y_ [batch,grayscale_in_height,grayscale_in_width]和4d张量[批次,高度,宽度,通道]相乘。
我收到以下错误:
ValueError: Incompatible shapes for broadcasting: (?, 420, 580) and (?, 420, 580, 1)
我试图在3个不同的位置重塑y:在 U 函数中,在它定义的最开始和cross_entropy中,但没有一个有效。