libtensorflow_framework.so

时间:2018-06-14 22:38:31

标签: tensorflow keras

我在运行一个简短的程序时,在libtensorflow_framework.so [7f7f08404000 + cc8000]的7f7b00000010 ip 00007f7f08846aa3 sp 00007f7ec0ff7ba0错误4中得到了“segfault”:

import numpy as np
import gc

from keras.models import Model
from keras.layers import Dense, Activation, Dropout, LSTM, Input, \
    BatchNormalization, concatenate, Embedding, Reshape
from keras.optimizers import SGD, Adam
from keras.utils import to_categorical


number_bins = 4096
embedding_dim = int((number_bins ** (1 / 4)) * 2) 
time_steps = 11
batch_size = 512

opt_sgd = SGD(lr=1e-4, momentum=0.9)  
opt_adam = Adam(lr=1e-4)


def generate_state_action_value(full_path, action=True):
    while True:
        with open(full_path) as f:
            value1_batch = []
            value2_batch = []
            label_batch = []
            value_batch = []
            count = 0
            for line in f:
                count += 1
                line = line.split(',')
                value = np.array(float(line.pop(-1)))
                line = list(map(int, line))
                label = line.pop(-1)
                value1 = line[:int(len(line) / 2)]
                value2 = line[int(len(line) / 2):]
                label = to_categorical(label, num_classes=3)
                value1_batch.append(value1)
                value2_batch.append(value2)
                label_batch.append(label)
                value_batch.append(value)
                if count == batch_size:
                    value1_ = np.reshape(np.array(value1_batch), (batch_size, time_steps, 1))
                    value2_ = np.reshape(np.array(value2_batch), (batch_size, time_steps, 1))
                    label_ = np.reshape(label_batch, (batch_size, 3))
                    value_ = np.reshape(value_batch, (batch_size, 1))
                    if action:
                        yield ([value1_, value2_], label_)
                    else:
                        yield ([value1_, value2_], value_)
                    value1_batch = []
                    value2_batch = []
                    label_batch = []
                    value_batch = []
                    count = 0


def generate_action_value(ful_path):
    actions = []
    values = []
    num_lines = 0
    with open(ful_path) as f:
        for line in f:
            num_lines += 1
            line = line.split(',')
            values.append(line[-1])
            actions.append(line[-2])
        f.close()
    actions = list(map(np.int, actions))
    values = list(map(np.float32, values))
    return actions, values, num_lines, (num_lines // batch_size)


def dnn_state_action(store_path, policy=True):
    # layer1_dropout = 0.15
    layer2_dropout = 0.
    layer3_dropout = 0.

    values_dropout = [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]

    for ld in values_dropout:
        layer1_dropout = ld
        print('========================================')
        print('layer2_dropout = ' + str(layer1_dropout))
        print('========================================')

        ############################################
        value1_input = Input(shape=(time_steps, 1))
        value2_input = Input(shape=(time_steps, 1))
        ############################################
        value1_input1 = Embedding(input_dim=number_bins, output_dim=embedding_dim, input_length=time_steps)(value1_input)
        value1_input2 = Reshape((time_steps, embedding_dim))(value1_input1)
        BatchNormalization()(value1_input2)
        value1_input2 = Dropout(layer1_dropout)(value1_input2)
        value1_lstm1 = LSTM(256)(value1_input2)
        BatchNormalization()(value1_lstm1)
        #####################
        value2_input1 = Embedding(input_dim=number_bins, output_dim=embedding_dim, input_length=time_steps)(value2_input)
        value2_input2 = Reshape((time_steps, embedding_dim))(value2_input1)
        BatchNormalization()(value2_input2)
        value2_input2 = Dropout(layer1_dropout)(value2_input2)
        value2_lstm1 = LSTM(256)(value2_input2)
        BatchNormalization()(value2_lstm1)
        #####################
        value1_value2_lstm1 = concatenate([value1_lstm1, value2_lstm1])
        value1_value2_lstm1 = Dropout(layer2_dropout)(value1_value2_lstm1)
        value1_value2_lstm2 = Dense(512, activation=None)(value1_value2_lstm1)
        BatchNormalization()(value1_value2_lstm2)
        value1_value2_lstm3 = Activation('relu')(value1_value2_lstm2)
        value1_value2_lstm4 = Dropout(layer3_dropout)(value1_value2_lstm3)
        value1_value2_lstm4 = Dense(128, activation=None)(value1_value2_lstm4)
        BatchNormalization()(value1_value2_lstm4)
        value1_value2_lstm4 = Activation('relu')(value1_value2_lstm4)
        output = Dense(3, activation='softmax', name='classifier')(value1_value2_lstm4)
        ############################################
        model = Model(inputs=[value1_input, value2_input], outputs=output)
        model.compile(optimizer=opt_adam, loss={'classifier': 'categorical_crossentropy'},
                      metrics={'classifier': 'accuracy'})
        if policy:
            train = 'train-policy.txt'
            test = 'test-policy.txt'

        actions_train, values_train, num_lines_train, num_batch_train = generate_action_value(store_path + train)
        actions_test, values_test, num_lines_test, num_batch_test = generate_action_value(store_path + test)

        model.fit_generator(generate_state_action_value(store_path + train, action=True),
                            epochs=100, verbose=2, steps_per_epoch=num_batch_train, max_queue_size=20,
                            validation_data=generate_state_action_value(store_path + test, action=True),
                            validation_steps=num_batch_test, workers=1)
        del model
        for _ in range(100):
            gc.collect()
    return

我在ubuntu 16.04(全新安装)上使用tensorflow-gpu(1.7.1),keras(2.1.6)。该程序运行几十(20-50)个时期然后崩溃,有时达到ld的另一个值。

你有什么想法,哪里出错了?提前谢谢。

0 个答案:

没有答案