如何提供tfrecord文件以训练堆叠式自动编码器

时间:2019-02-17 14:48:57

标签: python tensorflow keras autoencoder tfrecord

我不知道如何从TFRecord文件中获取大量功能,以使其作为堆叠式自动编码器的输入。

我对堆叠式自动编码器使用了以下功能:

from __future__ import print_function
import keras
import numpy
from keras.models import Sequential
from keras.layers.core import *
from sklearn.model_selection import train_test_split
from app_flag import FLAGS

class StackedAutoencoder(object):
    """
    Implementation of stacked autoencoder multi-class classifier using the Keras Python package.
    This classifier is used to classify cells to cell cycle phases S, G1 or G2M.
    """
def __init__(self, features, labels, num_labels):
    self.features = features
    self.labels = labels
    self.auto_encoder = None
    self.encoding_dim = num_labels


    # fix random seed for reproducibility
    self.seed = 7
    numpy.random.seed(7)

def create_autoencoder(self):
    """
    Build the stacked auto-encoder using multiple hidden layers.
    The stacked auto-encoder is then trained and weights are freezed afterwards.
    A softmax classification layer is that appended to the last layer, replacing the input
    re-constructed layer of the auto-encoder.
    :return: Compiled classification neural network model.
    """
    self.auto_encoder = Sequential()
    self.auto_encoder.add(Dense(3000, activation='relu', input_dim=self.features.shape[1]))
    self.auto_encoder.add(Dense(1000, activation='relu'))
    self.auto_encoder.add(Dense(30, activation='relu'))

    self.auto_encoder.add(Dense(3000, activation='relu'))
    self.auto_encoder.add(Dense(self.features.shape[1], activation='sigmoid'))

    self.auto_encoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    self.auto_encoder.fit(self.features, self.features,
                          epochs=10,
                          batch_size=5,
                          shuffle=True,
                          validation_split=0.33,
                          validation_data=None)

    self.auto_encoder.layers.pop()
    self.auto_encoder.add(Dense(self.encoding_dim, activation='softmax'))
    self.auto_encoder.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    print(self.auto_encoder.summary())

    # Freeze all weights after training the stacked auto-encoder and all the classification layer
    for i in range(0, len(self.auto_encoder.layers)-1):
        self.auto_encoder.layers[i].trainable = False

    return self.auto_encoder

def evaluate_autoencoder(self):
    """
    Fit the trained neural network and validate it using splitting the dataset to training and testing sets.
    :return: Accuracy score of the classification.
    """
    self.auto_encoder.fit(self.features, self.labels,
                          epochs=10,
                          batch_size=5,
                          shuffle=True)

    X_train, X_test, Y_train, Y_test = train_test_split(self.features, self.labels, test_size=0.33, random_state=self.seed)
    #predictions = self.auto_encoder.predict_classes(X_test)
    #print(predictions)
    #print(self.label_encoder.inverse_transform(predictions))
    score = self.auto_encoder.evaluate(X_test, Y_test, batch_size=5, verbose=1)
    return score

运行代码时,在该行出现错误:

self.auto_encoder.add(Dense(3000, activation='relu', input_dim=self.features.shape[1])) 

指示:

  

TypeError:float()参数必须是字符串或数字。

因此,如何使用TFRecord文件获取输入维数(特征数量)

0 个答案:

没有答案