我在运行一个简短的程序时,在libtensorflow_framework.so [7f7f08404000 + cc8000]的7f7b00000010 ip 00007f7f08846aa3 sp 00007f7ec0ff7ba0错误4中得到了“segfault”:
import numpy as np
import gc
from keras.models import Model
from keras.layers import Dense, Activation, Dropout, LSTM, Input, \
BatchNormalization, concatenate, Embedding, Reshape
from keras.optimizers import SGD, Adam
from keras.utils import to_categorical
number_bins = 4096
embedding_dim = int((number_bins ** (1 / 4)) * 2)
time_steps = 11
batch_size = 512
opt_sgd = SGD(lr=1e-4, momentum=0.9)
opt_adam = Adam(lr=1e-4)
def generate_state_action_value(full_path, action=True):
while True:
with open(full_path) as f:
value1_batch = []
value2_batch = []
label_batch = []
value_batch = []
count = 0
for line in f:
count += 1
line = line.split(',')
value = np.array(float(line.pop(-1)))
line = list(map(int, line))
label = line.pop(-1)
value1 = line[:int(len(line) / 2)]
value2 = line[int(len(line) / 2):]
label = to_categorical(label, num_classes=3)
value1_batch.append(value1)
value2_batch.append(value2)
label_batch.append(label)
value_batch.append(value)
if count == batch_size:
value1_ = np.reshape(np.array(value1_batch), (batch_size, time_steps, 1))
value2_ = np.reshape(np.array(value2_batch), (batch_size, time_steps, 1))
label_ = np.reshape(label_batch, (batch_size, 3))
value_ = np.reshape(value_batch, (batch_size, 1))
if action:
yield ([value1_, value2_], label_)
else:
yield ([value1_, value2_], value_)
value1_batch = []
value2_batch = []
label_batch = []
value_batch = []
count = 0
def generate_action_value(ful_path):
actions = []
values = []
num_lines = 0
with open(ful_path) as f:
for line in f:
num_lines += 1
line = line.split(',')
values.append(line[-1])
actions.append(line[-2])
f.close()
actions = list(map(np.int, actions))
values = list(map(np.float32, values))
return actions, values, num_lines, (num_lines // batch_size)
def dnn_state_action(store_path, policy=True):
# layer1_dropout = 0.15
layer2_dropout = 0.
layer3_dropout = 0.
values_dropout = [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
for ld in values_dropout:
layer1_dropout = ld
print('========================================')
print('layer2_dropout = ' + str(layer1_dropout))
print('========================================')
############################################
value1_input = Input(shape=(time_steps, 1))
value2_input = Input(shape=(time_steps, 1))
############################################
value1_input1 = Embedding(input_dim=number_bins, output_dim=embedding_dim, input_length=time_steps)(value1_input)
value1_input2 = Reshape((time_steps, embedding_dim))(value1_input1)
BatchNormalization()(value1_input2)
value1_input2 = Dropout(layer1_dropout)(value1_input2)
value1_lstm1 = LSTM(256)(value1_input2)
BatchNormalization()(value1_lstm1)
#####################
value2_input1 = Embedding(input_dim=number_bins, output_dim=embedding_dim, input_length=time_steps)(value2_input)
value2_input2 = Reshape((time_steps, embedding_dim))(value2_input1)
BatchNormalization()(value2_input2)
value2_input2 = Dropout(layer1_dropout)(value2_input2)
value2_lstm1 = LSTM(256)(value2_input2)
BatchNormalization()(value2_lstm1)
#####################
value1_value2_lstm1 = concatenate([value1_lstm1, value2_lstm1])
value1_value2_lstm1 = Dropout(layer2_dropout)(value1_value2_lstm1)
value1_value2_lstm2 = Dense(512, activation=None)(value1_value2_lstm1)
BatchNormalization()(value1_value2_lstm2)
value1_value2_lstm3 = Activation('relu')(value1_value2_lstm2)
value1_value2_lstm4 = Dropout(layer3_dropout)(value1_value2_lstm3)
value1_value2_lstm4 = Dense(128, activation=None)(value1_value2_lstm4)
BatchNormalization()(value1_value2_lstm4)
value1_value2_lstm4 = Activation('relu')(value1_value2_lstm4)
output = Dense(3, activation='softmax', name='classifier')(value1_value2_lstm4)
############################################
model = Model(inputs=[value1_input, value2_input], outputs=output)
model.compile(optimizer=opt_adam, loss={'classifier': 'categorical_crossentropy'},
metrics={'classifier': 'accuracy'})
if policy:
train = 'train-policy.txt'
test = 'test-policy.txt'
actions_train, values_train, num_lines_train, num_batch_train = generate_action_value(store_path + train)
actions_test, values_test, num_lines_test, num_batch_test = generate_action_value(store_path + test)
model.fit_generator(generate_state_action_value(store_path + train, action=True),
epochs=100, verbose=2, steps_per_epoch=num_batch_train, max_queue_size=20,
validation_data=generate_state_action_value(store_path + test, action=True),
validation_steps=num_batch_test, workers=1)
del model
for _ in range(100):
gc.collect()
return
我在ubuntu 16.04(全新安装)上使用tensorflow-gpu(1.7.1),keras(2.1.6)。该程序运行几十(20-50)个时期然后崩溃,有时达到ld的另一个值。
你有什么想法,哪里出错了?提前谢谢。