Question

我正在构建一个简单的TensorFlow模型，在其中使用Tf-Hub模块微调Elmo模型。

我得到了很好的结果，但是问题是当我保存模型时，同时在Keras和Kb中构建相同模型的地方很大。

我的输入数据是：

import numpy as np
data_points = np.array([['that what is good for the goose'],['some of which'],['demonstrating the adage'],['A series of escapades demonstrating']])
data_labels = np.array([[1,0,0,0,0],[1,0,0,1,1],[1,0,0,0,0],[1,0,0,1,1]])
tf_input    = np.array([sent for m in data_points for sent in m])

这是Keras模型：

#dl framework
import tensorflow as tf
import tensorflow_hub as hub
from keras import backend as K
import keras.layers as layers
from keras.engine import Layer
from keras.models import Model, load_model
from keras.callbacks import EarlyStopping,ModelCheckpoint
import pandas as pd
from keras.engine import Layer


class ElmoEmbeddingLayer(Layer):
    def __init__(self, **kwargs):
        self.dimensions = 1024
        self.trainable=True
        super(ElmoEmbeddingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
                               name="{}_module".format(self.name))

        self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
        super(ElmoEmbeddingLayer, self).build(input_shape)

    def call(self, x, mask=None):
        result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                      as_dict=True,
                      signature='default',
                      )['default']
        return result

    def compute_mask(self, inputs, mask=None):
        return K.not_equal(inputs, '--PAD--')

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.dimensions)


def build_model(): 
    input_text = layers.Input(shape=(1,), dtype="string")
    embedding = ElmoEmbeddingLayer()(input_text)
    pred = layers.Dense(5, activation='softmax')(embedding)
    model = Model(inputs=[input_text], outputs=pred)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
    return model


model = build_model()
checkpointer   = ModelCheckpoint('model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
model.fit(data_points,data_labels, validation_data=(data_points,data_labels),epochs=2,
batch_size= 5 ,callbacks=[checkpointer])

训练后，模型大小为87 kb

我在Tensorflow中构建了相同的模型：

import tensorflow as tf
import numpy as np
import os
import tensorflow_hub as hub



class Elmo_model(object):

    def __init__(self):

        tf.reset_default_graph()

        # placeholders
        sentences             = tf.placeholder(tf.string, (None,), name='sentences')
        self.targets          = tf.placeholder(tf.int32, [None, None], name='labels' )
        self.placeholders     = {'sentence': sentences, 'labels': self.targets}



        # elmo model
        module                = hub.Module('https://tfhub.dev/google/elmo/2', trainable = True)
        embeddings            = module(dict(text=sentences))

        dense_layer           = tf.layers.dense(embeddings, 5)


        #optimization and loss calculation ---------------------------------->>

        self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits = dense_layer, labels = tf.cast(self.targets,tf.float32))
        self.loss = tf.reduce_mean(tf.reduce_sum(self.cross_entropy, axis=1))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.loss)
        self.predictions = tf.cast(tf.sigmoid(dense_layer) > 0.5, tf.int32)

培训：

def model_execute(model):

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run([tf.global_variables_initializer(), tf.tables_initializer()])

        for iteration in range(5):
            model_out,train = sess.run([model.loss,model.optimizer],feed_dict={model.placeholders['sentence']: tf_input,
                                                                           model.placeholders['labels']: data_labels})
        print(model_out)
        saver.save(sess, 'tf_model.hdf5')



model_out = Elmo_model()
model_execute(model_out)

在训练了相同的纪元和数据集之后，TensorFlow模型的大小为374.5 MB

我的问题是Keras如何优化模型以及如何在Tensorflow中保存像Keras这样的模型？我经历了issue on github和StackOverflow question这两个步骤，但找不到帮助。

Tensorflow：如何优化训练后的模型大小？

0 个答案: