我正在尝试训练Tensorflow卷积神经网络,无论我在什么环境下运行程序,我总是会遇到一个隐秘错误。
在Jupyter Notebook中,内核只是死掉了。
在终端中,我得到“非法指令:4”,没有回溯。
在Pycharm中,我得到:“进程以退出代码132(被信号4:SIGILL中断)完成了。”
我在Internet上四处查看,但没有发现任何在这种情况下引发此特定错误的实例。如果有人可以帮助您弄清这个错误,我将不胜感激。
我正在将Mac OS X High Sierra与python 3.6.2一起使用
我的代码可以在下面找到,正如我之前所说的,没有回溯。
import tensorflow as tf
import numpy as np
import pandas as pd
# OS to load files and save checkpoints
import os
image_height = 60
image_width = 1
image1_height = 15
image2_width = 1
model_name = "tensorflowCNN"
train_data = np.asarray(pd.read_csv("/home/student/Desktop/TrainingInput.csv", usecols=[1]))
lis = train_data.tolist()
lis = lis[0:60]
lis = [x[0].strip('[]\n,') for x in lis]
nlis = []
for i in lis:
nlis.append(i.split())
for i in range(len(nlis)):
nlis[i] = [float(x) for x in nlis[i] if x != "...,"]
nlis = [np.mean(x) for x in nlis]
train_data = np.asarray(nlis)
train_labels = np.asarray(pd.read_csv("/home/student/Desktop/TrainingInput.csv", usecols=[2]))
mylist = train_labels.tolist()
mylist = mylist[0:60]
mylist = [x[0] for x in mylist]
for i in range(len(mylist)):
if mylist[i] == "GravelTraining":
mylist[i] = 1.0
elif mylist[i] == "WaterTraining":
mylist[i] = 2.0
else:
mylist[i] = 3.0
print(mylist)
train_labels = np.asarray(mylist)
eval_data = np.asarray(pd.read_csv("/home/student/Desktop/TestingInput.csv", usecols=[1]))
List = eval_data.tolist()
List = List[0:15]
eval_data = np.asarray(List)
eval_labels = np.asarray(pd.read_csv("/home/student/Desktop/TestingInput.csv", usecols=[2]))
myList = eval_labels.tolist()
myList = myList[0:15]
eval_labels = np.asarray(myList)
category_names = list(map(str, range(3)))
# TODO: Process mnist data
train_data = np.reshape(train_data, (-1, image_height, image_width, 1))
train_labels = np.reshape(train_labels, (-1, image_height, image_width, 1))
eval_labels = np.reshape(eval_labels, (-1, image1_height, image2_width, 1))
eval_data = np.reshape(eval_data, (-1, image1_height, image2_width, 1))
# TODO: The neural network
class ConvNet:
def __init__(self, image_height, Image_width, num_classes, chan):
self.input_layer = tf.placeholder(dtype=tf.float32, shape=[1, image_height, Image_width, chan], name="inputs")
conv_layer_1 = tf.layers.conv2d(self.input_layer, filters=32, kernel_size=[5, 5], padding="same",
activation=tf.nn.relu)
pooling_layer_1 = tf.layers.max_pooling2d(conv_layer_1, pool_size=[2, 1], strides=1)
conv_layer_2 = tf.layers.conv2d(pooling_layer_1, filters=64, kernel_size=[5, 5], padding="same",
activation=tf.nn.relu)
pooling_layer_2 = tf.layers.max_pooling2d(conv_layer_2, pool_size=[2,1], strides=2)
conv_layer_3 = tf.layers.conv2d(pooling_layer_2, filters=128, kernel_size=[5,5], padding="same",
activation=tf.nn.relu)
pooling_layer_3 = tf.layers.max_pooling2d(conv_layer_3, pool_size=[2,1], strides=2)
flattened_pooling = tf.layers.flatten(pooling_layer_1)
dense_layer = tf.layers.dense(flattened_pooling, 60, activation=tf.nn.relu)
dropout = tf.layers.dropout(dense_layer, rate=0.4, training=True)
output_dense_layer = tf.layers.dense(dropout, num_classes)
self.choice = tf.argmax(output_dense_layer, axis=1)
self.probabilities = tf.nn.softmax(output_dense_layer)
self.labels = tf.placeholder(dtype=tf.float32, name="labels")
self.accuracy, self.accuracy_op = tf.metrics.accuracy(self.labels, self.choice)
one_hot_labels = tf.one_hot(indices=tf.cast(self.labels, dtype=tf.int32), depth=num_classes)
self.loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=output_dense_layer)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2)
self.train_operation = optimizer.minimize(loss=self.loss, global_step=tf.train.get_global_step())
# Training process:variables
training_steps = 20000
batch_size = 60
path = "./" + model_name + "-cnn/"
load_checkpoint = False
tf.reset_default_graph()
dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
dataset = dataset.shuffle(buffer_size=train_labels.shape[0])
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
dataset_iterator = dataset.make_initializable_iterator()
next_element = dataset_iterator.get_next()
cnn = ConvNet(image_height, image_width, 1, 1)
print("milestone1")
saver = tf.train.Saver(max_to_keep=2)
print('milestone2')
if not os.path.exists(path):
os.makedirs(path)
print('milestone3')
with tf.Session() as sess:
# if load_checkpoint:
# print(path)
# checkpoint = tf.train.get_checkpoint_state(path)
# print(checkpoint)
# saver.restore(sess, checkpoint.model_checkpoint_path)
# else:
sess.run(tf.global_variables_initializer())
print('milestone4')
sess.run(tf.local_variables_initializer())
sess.run(dataset_iterator.initializer)
for step in range(training_steps):
current_batch = sess.run(next_element)
batch_inputs = current_batch[0]
batch_labels = current_batch[1]
sess.run((cnn.train_operation, cnn.accuracy_op),
feed_dict={cnn.input_layer: batch_inputs, cnn.labels: batch_labels})
if step % 1 == 0 and step > 0:
current_acc = sess.run(cnn.accuracy)
print("Accuracy at step " + str(step) + ":" + str(current_acc))
saver.save(sess, path + model_name, step)
print("Saving final checkpoint for training session.")
saver.save(sess, path + model_name, step)
谢谢。
答案 0 :(得分:3)
好 如果您拥有的2.66 GHz版本在我看来是2010年发布的Arrendale架构,在这种情况下,绝对没有任何机会使用它,因为该版本没有最新的张量流二进制文件所需的AVX指令。
除非您的CPU是Sandy Bridge或更高版本(所以是AVX指令)
您的选择是:
1)获得更新的CPU
2)安装张量流的旧版本
3)从源头编译张量流
有关降级版本,请参见。
Illegal instruction(core dumped) tensorflow
Illegal instruction when import tensorflow in Python
编辑
看起来,尽管所有Core(i3,i5,i7)和Xeon CPU Sandy Bridge及更新版本都支持AVX,但截至2018年,对于Celron或Pentium CPU而言,情况并非如此。如果购买硬件,请检查计算机包含的内容。