transformer.py:
import tensorflow as tf
import numpy as np
from modules.encoder import Encoder
from modules.decoder import Decoder
class Transformer(tf.keras.Model):
def __init__(self, num_layers=4, d_model=512, num_heads=8, dff=2048, pe_max_len=8000,
target_vocab_size=8000, rate=0.1,config=None,logger=None):
super(Transformer, self).__init__()
if config is not None:
num_enc_layers = config.model.N_encoder
if logger is not None:
logger.info('config.model.N_encoder: '+str(num_enc_layers))
num_dec_layers = config.model.N_decoder
if logger is not None:
logger.info('config.model.N_decoder: '+str(num_dec_layers))
d_model = config.model.d_model
if logger is not None:
logger.info('config.model.d_model: '+str(d_model))
num_heads = config.model.n_heads
if logger is not None:
logger.info('config.model.n_heads: '+str(num_heads))
dff = config.model.d_ff
if logger is not None:
logger.info('config.model.d_ff: '+str(dff))
pe_max_len = config.model.pe_max_len
if logger is not None:
logger.info('config.model.pe_max_len:'+str(pe_max_len))
target_vocab_size = config.model.vocab_size
if logger is not None:
logger.info('config.model.vocab_size:'+str(target_vocab_size))
rate = config.model.dropout
if logger is not None:
logger.info('config.model.dropout: '+str(rate))
else:
print('use default params')
num_enc_layers = num_layers
num_dec_layers = num_layers
self.encoder = Encoder(num_enc_layers, d_model, num_heads, dff,
pe_max_len,'encoder', rate)
self.decoder = Decoder(num_dec_layers, d_model, num_heads, dff,
target_vocab_size, 'decoder',pe_max_len,rate)
# self.final_layer = tf.keras.layers.Dense(target_vocab_size)
def call(self, inputs, training, enc_padding_mask,
look_ahead_mask, dec_padding_mask):
inp = tf.cast(inputs[0],tf.float32)
tar = tf.cast(inputs[1],tf.int32)
enc_output = self.encoder((inp, enc_padding_mask),training) # (batch_size, inp_seq_len, d_model)
# dec_output.shape == (batch_size, tar_seq_len, d_model)
dec_output, attention_weights = self.decoder(
(tar, enc_output, look_ahead_mask, dec_padding_mask),training)
# final_output = self.final_layer(dec_output) # (batch_size, tar_seq_len, target_vocab_size)
final_output = dec_output
return final_output, attention_weights
if __name__=='__main__':
sample_transformer = Transformer(
num_layers=2, d_model=512, num_heads=4, dff=2048,
pe_max_len=8500, target_vocab_size=32)
temp_input = tf.random.uniform((64, 62))
temp_target = tf.random.uniform((64, 26))
# temp_input = tf.keras.layers.Input((64,62),dtype=tf.float32)
# temp_target = tf.keras.layers.Input((16,),dtype=tf.float32)
# 如果想对inputs浓缩,那么几个mask也要建立Input?
fn_out, _ = sample_transformer(inputs=(temp_input, temp_target), training=False,
enc_padding_mask=None,
look_ahead_mask=None,
dec_padding_mask=None)
sample_transformer.summary()# 如果在调用call之前进行summary会提示model not build
'''
Model: "transformer"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
encoder (Encoder) multiple 10656768
_________________________________________________________________
decoder (Decoder) multiple 12504064
_________________________________________________________________
dense_32 (Dense) multiple 4104000
=================================================================
Total params: 27,264,832
Trainable params: 27,264,832
Non-trainable params: 0
_________________________________________________________________
'''
# tf.keras.utils.plot_model(sample_transformer)
print(sample_transformer.get_layer('encoder'))
tp = sample_transformer.trainable_variables
for i in range(20):
print(tp[i].name)
'''
<modules.encoder.Encoder object at 0x00000151AD449390>
transformer/encoder/enc_embedding/embeddings:0
transformer/encoder/encoder_layer/multi_head_attention/dense/kernel:0
transformer/encoder/encoder_layer/multi_head_attention/dense/bias:0
transformer/encoder/encoder_layer/multi_head_attention/dense_1/kernel:0
transformer/encoder/encoder_layer/multi_head_attention/dense_1/bias:0
transformer/encoder/encoder_layer/multi_head_attention/dense_2/kernel:0
transformer/encoder/encoder_layer/multi_head_attention/dense_2/bias:0
transformer/encoder/encoder_layer/multi_head_attention/dense_3/kernel:0
transformer/encoder/encoder_layer/multi_head_attention/dense_3/bias:0
transformer/encoder/encoder_layer/sequential/dense_4/kernel:0
transformer/encoder/encoder_layer/sequential/dense_4/bias:0
transformer/encoder/encoder_layer/sequential/dense_5/kernel:0
transformer/encoder/encoder_layer/sequential/dense_5/bias:0
transformer/encoder/encoder_layer/layer_normalization/gamma:0
transformer/encoder/encoder_layer/layer_normalization/beta:0
transformer/encoder/encoder_layer/layer_normalization_1/gamma:0
transformer/encoder/encoder_layer/layer_normalization_1/beta:0
transformer/encoder/encoder_layer_1/multi_head_attention_1/dense_6/kernel:0
transformer/encoder/encoder_layer_1/multi_head_attention_1/dense_6/bias:0
transformer/encoder/encoder_layer_1/multi_head_attention_1/dense_7/kernel:0
'''
# model = tf.keras.models.Model(inputs=[temp_input,temp_target],outputs=[fn_out])
# model.summary()
print(fn_out.shape) # (batch_size, tar_seq_len, target_vocab_size)
# summary_writer = tf.keras.callbacks.TensorBoard(log_dir='modules')
# summary_writer.set_model(model)
Speech_Transformer定义:
from transformer import Transformer
import tensorflow as tf
from modules.attention import Pre_Net
from modules.input_mask import create_combined_mask
import numpy as np
from utils import AttrDict
import yaml
class Speech_transformer(tf.keras.Model):
def __init__(self, config, logger=None):
super(Speech_transformer, self).__init__()
self.pre_net = Pre_Net(config.model.num_M, config.model.n, config.model.c)
self.transformer = Transformer(config=config, logger=logger)
def call(self, inputs, targets, training, enc_padding_mask, look_ahead_mask, dec_padding_mask):
out = self.pre_net(inputs, training)
final_out, attention_weights = self.transformer((out, targets), training, enc_padding_mask,
look_ahead_mask, dec_padding_mask)
return final_out, attention_weights
if __name__ == '__main__':
configfile = open(r'C:\Users\LungChi\Desktop\Speech-Transformer-tf2.0-master\config\hparams.yaml', encoding='utf-8')
config = AttrDict(yaml.load(configfile, Loader=yaml.FullLoader))
print(config.data_name)
inputs = np.random.randn(32, 233, 80, 3)
targets = np.random.randint(0, 31, [32, 55])
combined_mask = create_combined_mask(targets)
st = Speech_transformer(config, None)
final_out, attention_weights = st(inputs, targets, True, None, combined_mask, None)
print('final_out.shape:', final_out.shape)
print('final_out:', final_out)
在训练步骤中,使用以下代码保存模型:
ckpt = tf.train.Checkpoint(transformer=model, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=3)
ckpt_save_path = ckpt_manager.save()
借此,我可以成功训练模型并保存模型
在测试步骤中,当我使用以下代码还原模型时:
model_to_be_restored = Speech_transformer(config=config)
ckpt = tf.train.Checkpoint(transformer=model_to_be_restored)
ckpt.restore(tf.train.latest_checkpoint(r'logdir/logging/P_S_Transformer_debug'))
测试过程可以以退出代码0完成,并得到我想要的。 但是,在此过程中存在以下问题:
在以下位置忽略异常:> 追溯(最近一次通话): del 中的文件“ C:\ Users \ LungChi \ AppData \ Roaming \ Python \ Python36 \ site-packages \ tensorflow_core \ python \ training \ tracking \ util.py”,第140行 TypeError:“ NoneType”对象不可调用
以退出代码0结束的过程
part of util.py:
class _CheckpointRestoreCoordinatorDeleter(object):
"""Deleter to avoid overriding _CheckpointRestoreCoordinator.__del__()."""
def __init__(self, expect_partial, object_graph_proto, matched_proto_ids,
unused_attributes):
self.expect_partial = expect_partial
self.object_graph_proto = object_graph_proto
self.matched_proto_ids = matched_proto_ids
self.unused_attributes = unused_attributes
def set_expect_partial(self, expect_partial):
self.expect_partial = expect_partial
def __del__(self):
if self.expect_partial:
return
if logging is None:
# The logging module may have been unloaded when __del__ is called.
log_fn = print
else:
log_fn = logging.warning
printed_warning = False
pretty_printer = _ObjectGraphProtoPrettyPrinter(self.object_graph_proto)# traceback to this line
how can I solve it? please help me, thanks vary much!!!