我现在正在处理医学图像分类问题。图像是患者大脑的多个切片。数据已经清理完毕。我有150名AD患者,150名MCI(轻度认知障碍)患者,150名NC患者(正常)。每位患者有96个dicom文件或同样说,96片。每个切片为160 * 160。
我使用Tensorflow cifar10代码作为我的模板,并使代码工作我必须更改其read_cifar10部分。我改变了代码,如下面的链接所示。
Attach a queue to a numpy array in tensorflow for data fetch instead of files?
首先,我使用我自己的python模块load_img_M.py将数据更改为二进制文件。为了减少数据量,我只选择中间切片,从30到60。
import numpy as np
import dicom
import os
ad_path = '/home/zmz/Pictures/AD'
mci_path = '/home/zmz/Pictures/MCI'
nc_path = '/home/zmz/Pictures/NC'
data_path =['/home/zmz/Pictures/%s' %i for i in ['NC','MCI','AD']]
KDOWN = 29
KUP = 61
SLICESNUM = KUP - KDOWN + 1
RECORDBYTES = 160*160*SLICESNUM + 1
#load image from the directory and save to binary file
def img2binary():
train_arr = np.zeros([100,SLICESNUM*160*160+1])
test_arr = np.zeros([50,SLICESNUM*160*160+1])
for p in range(len(data_path)):
Patientlist = os.listdir(data_path[p])
for q in range(len(Patientlist)):
Dicompath = os.path.join(data_path[p],Patientlist[q])
Dicomlist = os.listdir(Dicompath)
if q<100:
train_arr[q,0] = p
else:
test_arr[q-100,0] = p
for k in range(len(Dicomlist)):
if k>KDOWN and k<KUP:#select the middle slices which have the most information
Picturepath = os.path.join(Dicompath,Dicomlist[k])
img = dicom.read_file(Picturepath)
#print(type(img.pixel_array))
imgpixel = img.pixel_array.reshape(25600)
if q <100:
# assign the label of the picture
train_arr[q,(1+(k-KDOWN-1)*25600):(1+(k-KDOWN)*25600)] = imgpixel #assign the pixel
else:
test_arr[q-100,(1+(k-KDOWN-1)*25600):(1+(k-KDOWN)*25600)] = imgpixel
train_arr.tofile("/home/zmz/Pictures/tmp/images/train%s.bin"%p)
test_arr.tofile("/home/zmz/Pictures/tmp/images/test%s.bin"%p)
二进制文件如下所示:
接下来,我更改了cifar10_input模块:
"""Routine for decoding the Alzeheimer dicom format"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import load_img_M
import os
import numpy as np
import dicom
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_SIZE = 160
# Global constants describing the ADNI data set.
IMAGE_HEIGHT = 160
IMAGE_WIDTH = 160
IMAGE_CHANNEL = 1
SLICES_NUM = load_img_M.SLICESNUM
NUM_CLASSES = 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 300
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 150
#define a dicom reader to read a record
def read_ADNI(filename_queue):
"""Reads and parses examples from ADNI data files.
Recommendation: if you want N-way read parallelism, call this function
N times. This will give you N independent Readers reading different
files & positions within those files, which will give better mixing of
examples.
Args:
filename_queue: A queue of strings with the filenames to read from.
Returns:
An object representing a single example, with the following fields:
height: number of rows in the result (160)
width: number of columns in the result (160)
channels: number of color channels in the result (1)
key: a scalar string Tensor describing the filename & record number
for this example.
label: an int32 Tensor with the label in the range 0,1,2.
uint8image: a [slice, height, width, channels] uint8 Tensor with the image data
"""
class ADNIRecord(object):
pass#do nothing the class is vacant
result = ADNIRecord()
label_bytes = 1
result.height = IMAGE_HEIGHT
result.width = IMAGE_WIDTH
result.depth = IMAGE_CHANNEL
result.slice = SLICES_NUM
image_bytes = result.height * result.width * result.depth * result.slice
record_bytes = label_bytes + image_bytes
assert record_bytes == load_img_M.RECORDBYTES
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
result.key, value = reader.read(filename_queue)
# Convert from a string to a vector of uint8 that is record_bytes long.
record_bytes = tf.decode_raw(value, tf.uint8)
# The first bytes represent the label, which we convert from uint8->int32.
result.label = tf.cast(
tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(
tf.strided_slice(record_bytes, [label_bytes],
[label_bytes + image_bytes]),
[result.slice,result.height, result.width, result.depth])
# Convert from [depth, height, width] to [height, width, depth].
#result.uint8image = tf.transpose(depth_major, [1, 2, 0])
result.uint8image = depth_major
return result
最后,我改变了扭曲的输入。我删除那些用于图像预处理的块,如裁剪和翻转:
def distorted_inputs(data_dir, batch_size):
"""Construct distorted input for ADNI training using the Reader ops.
Args:
data_dir: Path to the ADNI data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 5D tensor of [batch_size, slices , IMAGE_SIZE , IMAGE_SIZE, 1] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
#filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
# for i in xrange(1, 6)]#data_batch_1,2,3.bin
filenames = [os.path.join(data_dir,'tmp/images/train%s.bin' % i) for i in [0,1,2]]
for f in filenames:
if not tf.gfile.Exists(f):
raise ValueError('Failed to find file: ' + f)
# Create a queue that produces the filenames to read.
filename_queue = tf.train.string_input_producer(filenames)
'''
call the first function defined at the very beggining
'''
# Read examples from files in the filename queue.
read_input = read_ADNI(filename_queue)
reshaped_image = tf.cast(read_input.uint8image, tf.float32)
# Set the shapes of tensors.
reshaped_image.set_shape([SLICES_NUM,IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL])
read_input.label.set_shape([1])
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
min_fraction_of_examples_in_queue)
print ('Filling queue with %d ADNI images before starting to train. '
'This will take a few minutes.' % min_queue_examples)
return _generate_image_and_label_batch(reshaped_image, read_input.label,
min_queue_examples, batch_size,
shuffle=True)
此问题是3d conv网络问题。我使用tf.conv3d并进行更改,以便代码可以工作:
def inference(images):#core code
"""Build the ADNI model.
Args:
images: Images returned from distorted_inputs() or inputs().
Returns:
Logits.
"""
# We instantiate all variables using tf.get_variable() instead of
# tf.Variable() in order to share variables across multiple GPU training runs.
# If we only ran this model on a single GPU, we could simplify this function
# by replacing all instances of tf.get_variable() with tf.Variable().
#
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[3, 3, 3, 1, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv3d(images, kernel, [1, 1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool3d(conv1, ksize=[1, 3, 3, 3, 1], strides=[1, 1, 2, 2, 1],
padding='SAME', name='pool1')
# norm1
#norm1 = tf.nn.lrn3d(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm1')
norm1 = pool1
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[3, 3, 3, 64, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv3d(norm1, kernel, [1, 1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2
#norm2 = tf.nn.lrn3d(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm2')
norm2 = conv2
# pool2
pool2 = tf.nn.max_pool3d(norm2, ksize=[1, 3, 3 ,3, 1],
strides=[1, 1, 2, 2, 1], padding='SAME', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(local3)
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights', shape=[384, 192],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
_activation_summary(local4)
# linear layer(WX + b),
# We don't apply softmax here because
# tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
# and performs the softmax internally for efficiency.
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
stddev=1/192.0, wd=0.0)
biases = _variable_on_cpu('biases', [NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
return softmax_linear
我担心计算机无法处理这么大量的数据。所以我首先选择批量大小为1,希望代码至少可以运行。但我在标题中遇到问题。其实我有非常好的Linux工作站.12G GPU titan xp,64G内存。但我和同学分享。因此,分配给我的帐户的资源可能很少。 My Linux GPU parameters
如果您可以进行一些计算来证明资源耗尽的原因,那就更好了。
答案 0 :(得分:0)
即使是批量1,网络太大了。 问题可以在所需的重量数量中找到,而不是在批次中。 尝试删除几层fraonet或减少三维图像的像素,看看它是你的极限。