我刚刚完成了关于多标签文本分类(百分比单位)的Keras BERT模型的训练,我希望能够将训练模型应用于新的(未标记)文本。
这是我模型的主要部分:
class BertLayer(tf.keras.layers.Layer):
def __init__(
self,
n_fine_tune_layers=10,
pooling="first",
bert_path="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1", #This is the model we choose
**kwargs,
):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
self.pooling = pooling
self.bert_path = bert_path
if self.pooling not in ["first", "mean"]:
raise NameError(
f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
)
super(BertLayer, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({
'n_fine_tune_layers': self.n_fine_tune_layers,
'trainable': self.trainable,
'output_size': self.output_size,
'pooling': self.pooling,
'bert_path': self.bert_path,
})
return config
def build(self, input_shape):
self.bert = hub.Module(
self.bert_path, trainable=self.trainable, name=f"{self.name}_module"
)
# Remove unused layers
trainable_vars = self.bert.variables
if self.pooling == "first":
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
trainable_layers = ["pooler/dense"]
elif self.pooling == "mean":
trainable_vars = [
var
for var in trainable_vars
if not "/cls/" in var.name and not "/pooler/" in var.name
]
trainable_layers = []
else:
raise NameError(
f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
)
# Select how many layers to fine tune
for i in range(self.n_fine_tune_layers):
trainable_layers.append(f"encoder/layer_{str(11 - i)}")
# Update trainable vars to contain only the specified layers
trainable_vars = [
var
for var in trainable_vars
if any([l in var.name for l in trainable_layers])
]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
if self.pooling == "first":
pooled = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
elif self.pooling == "mean":
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"sequence_output"
]
mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)
input_mask = tf.cast(input_mask, tf.float32)
pooled = masked_reduce_mean(result, input_mask)
else:
raise NameError(f"Undefined pooling type (must be either first or mean, but is {self.pooling}")
return pooled
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
# Build model
def build_model(max_seq_length):
in_id = tf.keras.layers.Input(shape=(max_seq_length,), name="input_ids")
in_mask = tf.keras.layers.Input(shape=(max_seq_length,), name="input_masks")
in_segment = tf.keras.layers.Input(shape=(max_seq_length,), name="segment_ids")
#This is the input in list form to be fed to the model
bert_inputs = [in_id, in_mask, in_segment]
bert_output = BertLayer(n_fine_tune_layers=3, pooling="first")(bert_inputs) #calling the preloaded BERT model we have installed
dense = tf.keras.layers.Dense(256, activation='relu')(bert_output) # Attaching our model output here
pred = tf.keras.layers.Dense(len(possible_labels), activation='sigmoid')(dense)
model = tf.keras.models.Model(inputs=bert_inputs, outputs=pred)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
return model
def initialize_vars(sess):
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
K.set_session(sess)
modll_ = build_model(max_seq_length)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_ids (InputLayer) [(None, 200)] 0
__________________________________________________________________________________________________
input_masks (InputLayer) [(None, 200)] 0
__________________________________________________________________________________________________
segment_ids (InputLayer) [(None, 200)] 0
__________________________________________________________________________________________________
bert_layer (BertLayer) (None, 768) 110104890 input_ids[0][0]
input_masks[0][0]
segment_ids[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 256) 196864 bert_layer[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (None, 17) 4369 dense[0][0]
==================================================================================================
Total params: 110,306,123
Trainable params: 22,055,441
Non-trainable params: 88,250,682
__________________________________________________________________________________________________
# Instantiate variables
initialize_vars(sess)
train_input_ids.shape, train_input_masks.shape, train_segment_ids.shape, train_labels.shape
#Defining NBatchLogger for logging details for training
class NBatchLogger(tf.keras.callbacks.Callback):
def __init__(self, display):
self.seen = 0
self.display = display
def on_batch_end(self, batch, logs={}):
self.seen += logs.get('size', 0)
if self.seen % self.display == 0:
metrics_log = ''
for k in self.params['metrics']:
if k in logs:
val = logs[k]
if abs(val) > 1e-3:
metrics_log += ' - %s: %.4f' % (k, val)
else:
metrics_log += ' - %s: %.4e' % (k, val)
print('{}/{} ... {}'.format(self.seen,
self.params['samples'],
metrics_log))
out_batch = NBatchLogger(display=1000)
modll_.fit([train_input_ids, train_input_masks, train_segment_ids], train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VALIDATION_SPLIT, callbacks=[out_batch])
在这段代码之后,我尝试了:
modll_.save('model.h5')
#from tensorflow.keras.models import load_model
model = load_model('model.h5', custom_objects={'BertLayer': BertLayer})
但是我有:
TypeError: ('Keyword argument not understood:', 'output_size')
在研究此错误期间,我读到仅保存模型权重很简单,所以我尝试了:
modll_.save_weights('bert_weights.h5')
modll_.load_weights("bert_weights.h5")
一旦在模型中保留了modll_ weights,我便将模型应用于新文本以获得其谓词:
p_preds = modll_.predict([p_input_ids, p_input_masks, p_segment_ids])
pred_dict = {}
list_p = []
for i in p_preds:
pred_dict['S1'] = str((i[0]*100)) + ' %'
pred_dict['S2'] = str(i[1]*100) + ' %'
pred_dict['S3'] = str(i[2]*100) + ' %'
list_p.append(pred_dict)
for i in range(len(list_p)):
print(list_p[i], "\n")
但是我的分类很糟糕(我尝试了很多文本),这令人惊讶,因为在训练阶段我的准确度为0.94。我认为保存和加载模型时犯了错误。有什么想法吗?
答案 0 :(得分:0)
在加载配置并重建图层/模型时,将从config
返回的get_config
字典应用于构造函数。您的情况类似于:BertLayer(**config)
。
由于构造函数中没有output_size
,因此您应将参数output_size
添加到__init__
(建议这样做,因为您不应该对常数进行硬编码) ,或将其从get_config
中删除。