Question

您好我遵循http://deeplearning.net/tutorial/code/convolutional_mlp.py代码来实现转神经网络。我输入了通道很重要的图像，因此我希望将3通道特征图作为第0层输入。

所以我需要这样的东西

layer0_input = x.reshape((batch_size, 3, 240, 135)) # width 240, height 135, 3 channels

而不是

layer0_input = x.reshape((batch_size, 1, 28, 28)) # 28*28 normalized MNIST gray scale images

将在这里使用

layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 3, 240, 135),
    filter_shape=(nkerns[0], 1, 5, 5),
    poolsize=(2, 2)
)

其中x以

的形式提供给theano

 train_model = theano.function(
    [index],
    cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size],
        y: train_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

所以 - 我的问题是 - 我应该如何创建（塑造）train_set_x？

使用（灰度强度 - 即单通道）train_set_x创建为

    shared_x = theano.shared(numpy.asarray(data_x,
                                           dtype=theano.config.floatX),

其中data_x是一个长度为784的扁平numpy数组（28 * 28像素）

非常感谢您的建议

Answer 1

我能够让它运转起来。我在这里粘贴一些可能对某些人有帮助的代码。不是很优雅 - 但有效。

def shuffle_in_unison(a, b): #courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder assert len(a) == len(b) shuffled_a = np.empty(a.shape, dtype=a.dtype) shuffled_b = np.empty(b.shape, dtype=b.dtype) permutation = np.random.permutation(len(a)) for old_index, new_index in enumerate(permutation): shuffled_a[new_index] = a[old_index] shuffled_b[new_index] = b[old_index] return shuffled_a, shuffled_b

def createDataSet(imagefolder):

os.chdir(imagefolder)

# total number of files
number_of_files = len([item for item in os.listdir('.') if os.path.isfile(os.path.join('.', item))])

# get a shuffled list : I needed this because my image names were of the format n_x_<some details>.jpg
# where n was my target and x was a number from 0 to m-1 where m was the number of samples
# of the target value n. So I needed so shuffle and iterate while putting images in train
# test and validate arrays
image_index_array = range(0,number_of_files)
random.seed(12)
random.shuffle(image_index_array)
# split 80/10/10 - train/test/val
trainsize = int(number_of_files*.8)
testsize = int(number_of_files*.1)
valsize = number_of_files - trainsize - testsize

# create the random value arrays of train/test/val by slicing the total image index array
train_index_array = image_index_array[0:trainsize]
test_index_array = image_index_array[trainsize:trainsize+testsize]
validate_index_array = image_index_array[trainsize+testsize:]

# initialize the data structures
dataset = {'train':[[],[]],'test':[[],[]],'validate':[[],[]]}

i_counter = 0
train_X = []
train_y = []

test_X = []
test_y = []

val_X = []
val_y = []

for item in os.listdir('.'):
    if not os.path.isfile(os.path.join('.', item)):
        continue

    if item.endswith('.pkl'):
        continue

    print 'Processing item ' + item
    item_y = item.split('_')[0]
    item_x = cv2.imread(item)

    height, width = item_x.shape[:2]

    # this was my requirement - skip it if you do not need it
    if(height != 135 or width != 240):
        continue

    # get 3 channels
    b,g,r = cv2.split(item_x)

    item_x = [b,g,r]
    item_x = np.array(item_x)
    item_x = item_x.reshape(3,135*240)

    if i_counter in test_index_array:
        test_X.append(item_x)
        test_y.append(item_y)
    elif i_counter in validate_index_array:
        val_X.append(item_x)
        val_y.append(item_y)
    else:
        train_X.append(item_x)
        train_y.append(item_y)

    i_counter = i_counter + 1

# fix the dimensions. Flatten out the channel and intensity dimensions    
train_X = np.array(train_X)
train_X = train_X.reshape(train_X.shape[0],train_X.shape[1]*train_X.shape[2])
test_X = np.array(test_X)
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1]*test_X.shape[2])
val_X = np.array(val_X)
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1]*val_X.shape[2])

train_y = np.array(train_y)
test_y = np.array(test_y)
val_y = np.array(val_y)

# shuffle the train and test arrays in unison
train_X,train_y = shuffle_in_unison(train_X,train_y)
test_X,test_y = shuffle_in_unison(test_X,test_y)

# pickle them
dataset['train'] = [train_X,train_y]
dataset['test'] = [test_X,test_y]
dataset['validate'] = [val_X,val_y]
output = open('pcount.pkl', 'wb')
cPickle.dump(dataset, output)
output.close`

一旦你有了这个pickle文件您可以在convolutional_mlp.py中使用它。

    layer0_input = x.reshape((batch_size, 3, 135, 240))

# Construct the first convolutional pooling layer:
# filtering reduces the image size to (135-8+1 , 240-5+1) = (128, 236)
# maxpooling reduces this further to (128/2, 236/2) = (64, 118)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 64, 118)
layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 3, 135, 240),
    filter_shape=(nkerns[0], 3, 8, 5),
    poolsize=(2, 2)
)

logistic_sgd.py中的load_data函数需要进行少量更改，如下所示

    f = open(dataset, 'rb')
dump = cPickle.load(f)
train_set = dump['train']
valid_set = dump['validate']
test_set = dump['test']
f.close()

希望这有帮助

如何为3个通道的输入图像创建layer0输入

1 个答案: