我想创建和训练一个现有的卷积神经网络。 我正在使用DICOM图像,并且能够很好地提取代码。我将它们转换为尺寸224 * 224,然后使用它。 我还创建了一些实用程序功能来帮助使我的代码更具模块化。
每当训练神经网络时,我都会出错。我认为这主要与网络的体系结构有关,尽管我找不到任何缺陷。
尽管我想坚持使用给定的网络,但我尝试了稍有不同的组合,因为我实际上是在实现现有网络。
这是主要网络:
cnn_1.py
##SETUP
import tensorflow as tf
from retriever import IMG_PX_SIZE, classLength
#ALL HELPER FUNCTIONS
#INIT WEIGHTS
def init_weights(shape):
init_rand_dist = tf.truncated_normal(shape, stddev=0.01)
return tf.Variable(init_rand_dist)
#INIT BIAS
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape=shape)
return tf.Variable(init_bias_vals)
def init_bias_2(shape, value):
init_bias_vals = tf.constant(value, shape=shape)
return tf.Variable(init_bias_vals)
#CONV2D
def conv2d(x, W, stridec=[1, 1, 1, 1]):
# x---> [batch, h, w, channels]
# W---> [filter_h, filter_w, channel_in, channel_out]
print(x.shape)
return tf.nn.conv2d(x, W, strides=stridec, padding='SAME')
#POOLING
#Fixed ksize and strides
def max_pool_3by3(x):
# x---> [batch, h, w, channels]
print(x.shape)
return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
#CONVOLUTIONAL LAYER
def convolutional_layer(input_x, shape, stridec=[1, 1, 1, 1]):
W = init_weights(shape)
b = init_bias_2([shape[3]], 0.0)
#must cast b to float32
return tf.nn.relu(conv2d(input_x, W, stridec=stridec)+b)
def convolutional_layer_2(input_x, shape, stridec=[1, 1, 1, 1]):
W = init_weights(shape)
return conv2d(input_x, W, stridec=stridec)
#NORMAL LAYER
def normal_full_layer(input_layer, size):
print(input_layer.shape)
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias_2([size], 1.0)
return tf.matmul(input_layer, W)+b
#PLACEHOLDER
#x = tf.placeholder(tf.float32, shape=[None, IMG_PX_SIZE*IMG_PX_SIZE])
y_true = tf.placeholder(tf.float32, shape=[None, classLength])
#LAYERS
x_image = tf.placeholder(tf.float32, shape=[None, IMG_PX_SIZE, IMG_PX_SIZE, 1])
#x_image = tf.reshape(x, [-1, IMG_PX_SIZE, IMG_PX_SIZE, 1])
#decoy = convolutional_layer(x_image, shape=[11, 11, 1, 64])
convo1 = convolutional_layer(x_image, shape=[11, 11, 1, 64], stridec=[1, 4, 4, 1])
convo1_pool = max_pool_3by3(convo1)
convo2 = convolutional_layer(convo1_pool, shape=[5, 5, 64, 192])
convo2_pool = max_pool_3by3(convo2)
convo3 = convolutional_layer(convo2_pool, shape=[5, 5, 192, 384])
convo4 = convolutional_layer(convo3, shape=[3, 3, 384, 256])
convo5 = convolutional_layer(convo4, shape=[3, 3, 256, 256])
convo5_pool = max_pool_3by3(convo5)
print('Convolutional layers end')
#the dimension 8*8*256 might be wrong because of insufficient details of strides and paddings of some layers
convo5_flat = tf.reshape(convo5_pool, [-1, 14*14*256])
full_layer_one = tf.nn.relu(normal_full_layer(convo5_flat, 4096))
#DROPOUT
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one, keep_prob=hold_prob)
full_layer_two = tf.nn.relu(normal_full_layer(full_one_dropout, 4096))
#DROPOUT 2
full_two_dropout = tf.nn.dropout(full_layer_two, keep_prob=hold_prob)
full_layer_three = tf.nn.relu(normal_full_layer(full_two_dropout, 4096))
y_pred = normal_full_layer(full_layer_three, 15)
#LOSS FUNCTION
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))
#OPTIMIZER
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
print('Setup Completed')
##THIS MARKS THE END OF THE SETUP
##DATASET GENERATION
from retriever import getSubFolders, getImageArray, datasetPath
import numpy as np
import matplotlib.pyplot as plt
subfolders = getSubFolders(datasetPath)
print('Subfolders saved')
images = getImageArray(datasetPath, subfolders, 10)
print('Dataset Generated')
##THIS MARKS THE END OF THE DATASET GENERATION
##TRAINING
from utils import nextImageBatch, nextImageRandomBatch
steps = 1
with tf.Session() as sess:
sess.run(init)
index = 0
for i in range(steps):
#10 images per class
length = len(images)
batch_x, batch_y, index = nextImageBatch(images, length, len(subfolders), index)
sess.run(train, feed_dict={x_image:batch_x, y_true:batch_y, hold_prob:0.5})
#print accuracy every few steps
if i%50==0:
print("ON STEP {}".format(i))
print("ACCURACY: ")
matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
test_x, test_y, index = nextImageBatch(images, len(images)/(len(subfolders)*30), len(subfolders), index)
acc = tf.reduce_mean(tf.cast(matches, tf.float32))
print(sess.run(acc, feed_dict={x_image:test_x, y_true:test_y, hold_prob:1.0}))
print('\n')
这是我的函数,它从文件夹组中检索我的数据集,尽管我认为这不需要研究,因为我已经对其进行了测试,并且效果很好。
retriever.py
import pydicom as pydi
import dicom_numpy as dinum
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
from PIL import Image
datasetPath = '<my_folder>\\Dataset'
def getSubFolders(path):
subfolders = []
os.chdir(path)
for x in os.listdir('.'):
if not (x.endswith('.tcia')):
subfolders.append(x)
return subfolders
subfolders = getSubFolders(datasetPath)
#print(subfolders)
classLength = len(subfolders)
IMG_PX_SIZE = 224
def resize(img_dcm, IMG_PX_SIZE):
return cv2.resize(np.array(img_dcm.pixel_array), (IMG_PX_SIZE, IMG_PX_SIZE))
def getImageArray(path, subfolders, length):
#Following is the data structure that would store the images
images = []
#Instead of using each element of subfolders, we will use an index based on the length of the subfolders
#So that we can store the index value of the class (subfolders) instead of the class string value
for s in range(len(subfolders)):
Path = os.path.join(path, subfolders[s])
count = 0
print(s)
for root, dirs, files in os.walk(Path):
for file in files:
if file.endswith('.dcm') and count<length:
img = pydi.dcmread(os.path.join(root, file))
#print(type(img.pixel_array))
image = resize(img, IMG_PX_SIZE)
#print(image.shape)
count = count+1
images.append((s, image))
return images
这是我关于实用程序功能的文件。就像我想要的那样,它会返回(224,224,1)尺寸的图像。
import numpy as np
import math
import random
from retriever import IMG_PX_SIZE
#images: [(image_class, image_array), ......]
#This function returns an array that has equal number of images per class and not randomized
#length: total length of batch_x
def nextImageBatch(images, length=150, classes=15, index=0):
perClass = int(length/classes)
i = 0
classLength = int(len(images)/15)
#batch_x contains the images, batch_y contains the index of the associated class in the subfolders array
batchx, batchy = np.zeros((length, IMG_PX_SIZE, IMG_PX_SIZE, 1)), np.zeros((length, classes))
counter = 0
for cla in range(classes):
for i in range(perClass):
#Following is the index of the image to be appended
point = (cla*classLength) + (index*perClass) + i
#need to convert the 2d image to 3d
newimage = images[point][1]
batchx[counter] = newimage[:, :, np.newaxis]
batchy[counter][images[point][0]] = 1
counter = counter + 1
print('batch_x shape: {}'.format(batchx.shape))
print('batch_y shape: {}'.format(batchy.shape))
return batchx, batchy, index+1
这是我得到的输出,直到运行正常为止。 (我将分别打印错误)
(每层的输入大小)
(?, 224, 224, 1)
(?, 56, 56, 64)
(?, 28, 28, 64)
(?, 28, 28, 192)
(?, 14, 14, 192)
(?, 14, 14, 384)
(?, 14, 14, 256)
(?, 14, 14, 256)
Convolutional layers end
(?, 50176)
(?, 4096)
(?, 4096)
(?, 4096)
Setup Completed
Subfolders saved
(这些是用于分类的图像的类编号。不用担心)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
Dataset Generated
(批处理大小。每个大小为150张图像,大小为(224,224,1),属于15个类别之一) (batch_y [0]示例:[1。,0. 0. 0.,0. 0. 0.,0. 0. 0.,0. 0. 0.,0。]
batch_x shape: (150, 224, 224, 1)
batch_y shape: (150, 15)
错误(最后一行中的主要错误)
Traceback (most recent call last):
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1327, in _do_call
return fn(*args)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1306, in _run_fn
status, run_metadata)
File "D:\Anaconda3\envs\tfdeeplearning\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 1881600 values, but the requested shape requires a multiple of 50176
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool_2, Reshape/shape)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 391, in main
run()
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 272, in run_file
runpy.run_path(target, run_name='__main__')
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\python_projects\tensorflow\full-tensorflow-notes-and-data\tensorflow-bootcamp-master\Research\CBIR_CNN\cnn_1.py", line 152, in <module>
sess.run(train, feed_dict={x_image:batch_x, y_true:batch_y, hold_prob:0.5})
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1321, in _do_run
options, run_metadata)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 1881600 values, but the requested shape requires a multiple of 50176
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool_2, Reshape/shape)]]
Caused by op 'Reshape', defined at:
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 391, in main
run()
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 272, in run_file
runpy.run_path(target, run_name='__main__')
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\python_projects\tensorflow\full-tensorflow-notes-and-data\tensorflow-bootcamp-master\Research\CBIR_CNN\cnn_1.py", line 93, in <module>
convo5_flat = tf.reshape(convo5_pool, [-1, 14*14*256])
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 2619, in reshape
name=name)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 1881600 values, but the requested shape requires a multiple of 50176
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool_2, Reshape/shape)]]
请比其余代码更专心地研究网络体系结构。我认为主要的问题在那里。
有什么办法可以在运行时通过图层查看图像的大小转换。这将有更多帮助。