我在这里有点受阻。 我对张力流不是新手,即使我没有用它做很多事情。 但这是我的问题:
我想对7个不同类别的文本进行分类,为此我使用tfidf方法将字符串转换为数字。 在那里,我将稀疏矩阵转换为密集矩阵,这给了我一个2D矩阵形状(179,482)。 我将它分成150个障碍训练组和29个测试组。 然后,我按照以下方式设置了我的模型:
'''
-------------------------------------------------------------------------
Convolution 2D avec RELU
-------------------------------------------------------------------------
'''
def conv1d(x, W, b, name_conv, name_bias, name_relu, strides=1):
tensor = tf.nn.conv1d(x, W, strides, padding="SAME", name=name_conv)
tensor = tf.nn.bias_add(tensor,b, name=name_bias)
return tf.nn.relu(tensor, name=name_relu)
'''
-------------------------------------------------------------------------
Pooling max
-------------------------------------------------------------------------
'''
def maxpool2d(x, name, k=2):
return tf.nn.max_pool(x, [1, k, k, 1], [1, k, k, 1], padding="SAME", name=name)
'''
-------------------------------------------------------------------------
Modele du reseau convolutif :
CONV1-RELU-CONV2-RELU-FCL-Prediction
-------------------------------------------------------------------------
'''
def conv_net(x, poids, biais):
# Mise en forme de l'image d'entree
x = tf.reshape(x, shape=[-1, 482, 1])
conv1 = conv1d(x, poids['wconv1'], biais['bconv1'], name_conv='conv1', name_bias='bias1', name_relu='relu1')
#conv1 = maxpool2d(conv1, k=2, name='pool1')
conv2 = conv1d(conv1, poids['wconv2'], biais['bconv2'], name_conv='conv2', name_bias='bias2', name_relu='relu2')
#conv2 = maxpool2d(conv2, k=2, name='pool2')
# Mise en forme des activations de la seconde couche cachee pour l'entree de la couche completement connectee
fcl = tf.reshape(conv2, [-1, poids['wfcl'].get_shape().as_list()[0]], name='reshape')
fcl = tf.add(tf.matmul(fcl, poids['wfcl']), biais['bfcl'], name='fcl')
fcl = tf.nn.relu(fcl, name='relu3')
# Dropout
#fcl = tf.nn.dropout(fcl, dropout, name='dropout')
# Couche de sortie
out = tf.add(tf.matmul(fcl, poids['out']), biais['out'], name='out')
return out
if __name__ == '__main__':
'''
-------------------------------------------------------------------------
Parametres du reseau
-------------------------------------------------------------------------
'''
learning_rate = 0.00001
num_epochs = 10
batch_size = 1
dropout = 0.75 # ici probabilite de garder le neurone
logs_path = "tensorflow_logs"
data, targets = getData()
data = np.float32(data)
targets = np.float32(targets)
x_train = data[:150]
y_train = targets[:150]
x_test = data[150:]
y_test = targets[150:]
num_examples = x_train.shape[0]
num_input =x_train.shape[1]
num_classes = y_train.shape[1]
# Affichage des informations par pas de temps
display_step = 10
# Espaces reserves qui vont etre remplis par les tenseurs representant l'ensemble des images et des labels lors de l'apprentissage
x = tf.placeholder(tf.float32, [None, num_input], name='data')
y = tf.placeholder(tf.float32, [None, num_classes], name='labels')
#keep_prob = tf.placeholder(tf.float32, name='keep_prob')
#Stockage des poids et biais dans des variables TF
'''
TODO : initialiser avec une loi normale des variables tensorFlow :
- wconv1 et bcconv1 pour CONV1. wconv1 est un banc de 32 filtres 5*5*1. bconv1 a une taille adaptee
- wconv2 et bconv2 pour CONV2. wcconv2 est un banc de 32 filtres 5*5*32*64. bconv2 a une taille adaptee
- wfcl et bfcl pour la couche completement connectee : wfcl est un banc de filtres 7*7*64 a 1024 sorties. bfcl a une taille adaptee
- out (poids et biais) ont une taille adaptee pour la classification des donnees MNIST
'''
poids = {
'wconv1': tf.Variable(tf.random_normal([5, 1, 32])),
'wconv2': tf.Variable(tf.random_normal([5, 32, 64])),
'wfcl': tf.Variable(tf.random_normal([64, 1024])),
'out': tf.Variable(tf.random_normal([1024, num_classes]))
}
biais = {
'bconv1': tf.Variable(tf.random_normal([32])),
'bconv2': tf.Variable(tf.random_normal([64])),
'bfcl': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([num_classes]))
}
# Construction du modele
pred = conv_net(x, poids, biais)
print(pred.shape)
# Fonction de perte et procedure d'optimisation
#TODO : entropie croisee avec logits, algorithme d'optimisation ADAM
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluation du modele
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialisation des variables
init = tf.global_variables_initializer()
tf.summary.scalar("cost", cost)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
# Creation d'une session TF pour executer le programme
with tf.Session() as sess:
sess.run(init)
train_writer = tf.summary.FileWriter(logs_path + '/train', graph=tf.get_default_graph())
test_writer = tf.summary.FileWriter(logs_path + '/test', graph=tf.get_default_graph())
# Entrainement
total_batch = int(num_examples/batch_size)
for epoch in range(num_epochs):
# Entrainement sur les batchs d'images
for step in range (total_batch):
perm = np.arange(num_examples)
np.random.shuffle(perm)
indices = perm[0:batch_size]
batch_x = x_train[indices]
batch_y = y_train[indices]
_, summary = sess.run([optimizer, merged_summary_op], feed_dict={x: batch_x,
y: batch_y})
train_writer.add_summary(summary, epoch*total_batch+step)
if step % display_step == 0:
loss, acc, summaryt = sess.run([cost, accuracy, merged_summary_op], feed_dict={x: batch_x,
y: batch_y})
test_writer.add_summary(summaryt, epoch*total_batch+step)
print("Iteration " + str(epoch * total_batch + step) + ", Precision = " + "{:.5f}".format(acc))
# Test
print("Test:", sess.run(accuracy, feed_dict={x: x_test,
y: y_test}))
当我保留摘要时,我收到此错误:
InvalidArgumentError(请参阅上面的回溯):您必须为占位符张量数据提供一个值' dtype浮点数和形状[?,482]
所以,我有点迷失,因为当我删除摘要时如下:
for step in range (total_batch):
perm = np.arange(num_examples)
np.random.shuffle(perm)
indices = perm[0:batch_size]
batch_x = x_train[indices]
batch_y = y_train[indices]
sess.run([optimizer], feed_dict={x: batch_x,
y: batch_y})
#train_writer.add_summary(summary, epoch*total_batch+step)
if step % display_step == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
#test_writer.add_summary(summaryt, epoch*total_batch+step)
print("Iteration " + str(epoch * total_batch + step) + ", Precision = " + "{:.5f}".format(acc))
我的模型正在学习(它几乎没有学到任何东西,但它仍然在做什么)
谁能看到我做错了什么? 我必须承认,我一直使用4D阵列,主要用于图像识别,所以这个案例对我来说是新的。
感谢任何好的samaraitan回答我的问题。
P.S。很抱歉法国评论,但我相信您仍然可以理解代码
答案 0 :(得分:0)
好的,我忘记了提到的事情:我和Spyder一起工作。
错误必须来自那里,因为我直接在Ubuntu的控制台中执行了我的程序,一切正常。
关闭它。