我正在基于keras-bert建立模型。我可以在GPU上很好地运行模型,但是当我切换为在Google Colab中使用TPU加速时,出现以下错误:
InvalidArgumentError:indexes [0,0] = 101不在[0,2)中[[{{node model_1 / Embedding-Segment / embedding_lookup}}]]
我按照以下步骤构建和训练模型:
#@title Clear session.
from tensorflow.keras import backend as K
K.clear_session()
# @title Preparation
!pip3 install -q keras-bert
!wget -q https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
!unzip -o uncased_L-12_H-768_A-12.zip
# @title Environment
pretrained_path = 'uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')
# Use TF_Keras
os.environ["TF_KERAS"] = "1"
# @title Load Basic Model
import codecs
from keras_bert import load_trained_model_from_checkpoint
token_dict = {}
with codecs.open(vocab_path, 'r', 'utf8') as reader:
for line in reader:
token = line.strip()
token_dict[token] = len(token_dict)
model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
#@title Model Summary
model.summary()
#@title Create tokenization stuff.
from keras_bert import Tokenizer
tokenizer = Tokenizer(token_dict)
def tokenize(text,max_len):
tokenizer.tokenize(text)
return tokenizer.encode(first=text,max_len=max_len)
def tokenize_array(texts,max_len=512):
indices = np.zeros((texts.shape[0],max_len))
segments = np.zeros((texts.shape[0],max_len))
for i in range(texts.shape[0]):
tokens = tokenize(texts[i],max_len)
indices[i] = tokens[0]
segments[i] = tokens[1]
#print(indices.shape)
#print(segments.shape)
return np.stack([segments,indices],axis=1)
#@Tokenizer test.
test = tokenize_array(np.array(["Does she sell seashells by the seashore","Yes","No"]))
#print(test)
print(test.shape)
#@title One-hot encode answer starts
def one_hot_int(X,max_int):
try:
assert type(X) == np.ndarray, "Expected ndarray, got " + str(type(X))
assert X.shape[0] > 0,"Inputted array is empty."
one_hot = np.zeros((X.shape[0],max_int + 1))
for i in range(0,X.shape[0]):
access = X[i]
one_hot[i][access] = 1
return one_hot
except AssertionError as a:
raise ValueError(a)
except Exception as e:
print("Exception in one_hot_int: ",e)
#@ Tokenize inputs.
def X_Y_generator(dataset,batch_size=10):
while True:
try:
dataset_batch = dataset.sample(n=batch_size)
questions = dataset_batch["question"]
context = dataset_batch["paragraph_context"]
questions_tokenized = tokenize_array(questions.values)
#print("questions tokenized", questions_tokenized.shape)
context_tokenized = tokenize_array(context.values)
#print("contexts tokenized", context_tokenized.shape)
X = np.stack([questions_tokenized,context_tokenized],axis=1)
max_int = pd.concat((trainingData["answer_start"],devData["answer_start"])).max()
Y = one_hot_int(dataset_batch["answer_start"].values,max_int)
yield (X,Y)
except Exception as e:
print("Unhandled exception in X_Y_generator: ",e)
raise
train = X_Y_generator(trainingData)
print(train)
for i in train:
#print(i)
print(i[0].shape)
print(i[1].shape)
break
dev = X_Y_generator(devData)
input_layer = Input(shape=(2,2,512,))
print("input layer: ",input_layer.shape)
questions_input_layer = Lambda(lambda x: x[:,0])(input_layer)
context_input_layer = Lambda(lambda x: x[:,1])(input_layer)
print("questions input layer: ",questions_input_layer.shape)
print("context input layer: ",context_input_layer.shape)
questions_indices_layer = Lambda(lambda x: tf.cast(x[:,0],tf.float64))(questions_input_layer)
print("questions indices layer: ",questions_indices_layer.shape)
questions_segments_layer = Lambda(lambda x: tf.cast(x[:,1],tf.float64))(questions_input_layer)
print("questions segments layer: ",questions_segments_layer.shape)
context_indices_layer = Lambda(lambda x: tf.cast(x[:,0],tf.float64))(context_input_layer)
context_segments_layer = Lambda(lambda x: tf.cast(x[:,1],tf.float64))(context_input_layer)
questions_bert_layer = model([questions_indices_layer,questions_segments_layer])
print("Questions bert layer loaded.")
context_bert_layer = model([context_indices_layer,context_segments_layer])
print("Context bert layer loaded.")
questions_bert_layer.trainable = False
context_bert_layer.trainable = False
concat = Add()([questions_bert_layer,context_bert_layer])
bert_flatten = Flatten()(concat)
bert_dense = Dense(512,activation="softmax")(bert_flatten)
questions_reshape = Reshape((-1,1))(questions_input_layer)
context_reshape = Reshape((-1,1))(context_input_layer)
lstm_questions = LSTM(512)(questions_reshape)
lstm_context = LSTM(512)(context_reshape)
lstm_combined = Add()([lstm_questions,lstm_context])
lstm_output = Dense(512)(lstm_combined)
combined_outputs = Add()([bert_dense,lstm_output])
answers_network_output = Dense(3127,activation="softmax")(combined_outputs)
#answers_network = Model(inputs=[input_layer],outputs=[questions_bert_layer,context_bert_layer])
answers_network = Model(inputs=[input_layer],outputs=[answers_network_output])
answers_network.summary()
answers_network.compile("adam","categorical_crossentropy",metrics=[f1])
answers_network_checkpoint = ModelCheckpoint('answers_network-non-rnn-best.h5', verbose=1, monitor='val_f1',save_best_only=True, mode='auto')
print(answers_network.metrics_names)
testTrainingData = {"question":["Does she sell seashells by the seashore"],"paragraph_context":["She sells seashells by the seashore"],"answer_text":["She sells seashells by the seashore"],"answer_start":[0]}
testTrainingDataSet = pd.DataFrame.from_dict(testTrainingData)
testTrainingDataSet
testTrainingX = testTrainingDataSet[["question","paragraph_context"]]
testTrainingY = testTrainingDataSet["answer_start"]
testTrainingY
answers_network.fit_generator(X_Y_generator(testTrainingDataSet,batch_size=1),steps_per_epoch=100,epochs=100)
如何解决我的错误?