如何使我的keras-bert模型在TPU上工作?

时间:2019-12-06 23:30:34

标签: python tensorflow keras

我正在基于keras-bert建立模型。我可以在GPU上很好地运行模型,但是当我切换为在Google Colab中使用TPU加速时,出现以下错误:

  

InvalidArgumentError:indexes [0,0] = 101不在[0,2)中[[{{node   model_1 / Embedding-Segment / embedding_lookup}}]]

我按照以下步骤构建和训练模型:

#@title Clear session.
from tensorflow.keras import backend as K
K.clear_session()

# @title Preparation
!pip3 install -q keras-bert
!wget -q https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
!unzip -o uncased_L-12_H-768_A-12.zip

# @title Environment
pretrained_path = 'uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')
# Use TF_Keras
os.environ["TF_KERAS"] = "1"

# @title Load Basic Model
import codecs
from keras_bert import load_trained_model_from_checkpoint

token_dict = {}
with codecs.open(vocab_path, 'r', 'utf8') as reader:
    for line in reader:
        token = line.strip()
        token_dict[token] = len(token_dict)

model = load_trained_model_from_checkpoint(config_path, checkpoint_path)


#@title Model Summary
model.summary()

#@title Create tokenization stuff.
from keras_bert import Tokenizer

tokenizer = Tokenizer(token_dict)
def tokenize(text,max_len):
  tokenizer.tokenize(text)
  return tokenizer.encode(first=text,max_len=max_len)
def tokenize_array(texts,max_len=512):
  indices = np.zeros((texts.shape[0],max_len))
  segments = np.zeros((texts.shape[0],max_len))
  for i in range(texts.shape[0]):
    tokens = tokenize(texts[i],max_len)
    indices[i] = tokens[0]
    segments[i] = tokens[1]
  #print(indices.shape)
  #print(segments.shape)
  return np.stack([segments,indices],axis=1)

#@Tokenizer test.
test = tokenize_array(np.array(["Does she sell seashells by the seashore","Yes","No"]))
#print(test)
print(test.shape)

#@title One-hot encode answer starts
def one_hot_int(X,max_int):
  try:
    assert type(X) == np.ndarray, "Expected ndarray, got " + str(type(X))
    assert X.shape[0] > 0,"Inputted array is empty."
    one_hot = np.zeros((X.shape[0],max_int + 1))
    for i in range(0,X.shape[0]):
      access = X[i]
      one_hot[i][access] = 1
    return one_hot
  except AssertionError as a:
    raise ValueError(a)
  except Exception as e:
    print("Exception in one_hot_int: ",e)

#@ Tokenize inputs.
def X_Y_generator(dataset,batch_size=10):
  while True:
    try:
      dataset_batch = dataset.sample(n=batch_size)
      questions = dataset_batch["question"]
      context = dataset_batch["paragraph_context"]
      questions_tokenized = tokenize_array(questions.values)
      #print("questions tokenized", questions_tokenized.shape)
      context_tokenized = tokenize_array(context.values)
      #print("contexts tokenized", context_tokenized.shape)
      X = np.stack([questions_tokenized,context_tokenized],axis=1)
      max_int = pd.concat((trainingData["answer_start"],devData["answer_start"])).max()
      Y = one_hot_int(dataset_batch["answer_start"].values,max_int)
      yield (X,Y)
    except Exception as e:
      print("Unhandled exception in X_Y_generator: ",e)
      raise
train = X_Y_generator(trainingData)
print(train)
for i in train:
  #print(i)
  print(i[0].shape)
  print(i[1].shape)
  break
dev = X_Y_generator(devData)

input_layer = Input(shape=(2,2,512,))
print("input layer: ",input_layer.shape)
questions_input_layer = Lambda(lambda x: x[:,0])(input_layer)
context_input_layer = Lambda(lambda x: x[:,1])(input_layer)
print("questions input layer: ",questions_input_layer.shape)
print("context input layer: ",context_input_layer.shape)
questions_indices_layer = Lambda(lambda x: tf.cast(x[:,0],tf.float64))(questions_input_layer)
print("questions indices layer: ",questions_indices_layer.shape)
questions_segments_layer = Lambda(lambda x: tf.cast(x[:,1],tf.float64))(questions_input_layer)
print("questions segments layer: ",questions_segments_layer.shape)
context_indices_layer = Lambda(lambda x: tf.cast(x[:,0],tf.float64))(context_input_layer)
context_segments_layer = Lambda(lambda x: tf.cast(x[:,1],tf.float64))(context_input_layer)
questions_bert_layer = model([questions_indices_layer,questions_segments_layer])
print("Questions bert layer loaded.")
context_bert_layer = model([context_indices_layer,context_segments_layer])
print("Context bert layer loaded.")
questions_bert_layer.trainable = False
context_bert_layer.trainable = False
concat = Add()([questions_bert_layer,context_bert_layer])
bert_flatten = Flatten()(concat)
bert_dense = Dense(512,activation="softmax")(bert_flatten)
questions_reshape = Reshape((-1,1))(questions_input_layer)
context_reshape = Reshape((-1,1))(context_input_layer)
lstm_questions = LSTM(512)(questions_reshape)
lstm_context = LSTM(512)(context_reshape)
lstm_combined = Add()([lstm_questions,lstm_context])
lstm_output = Dense(512)(lstm_combined)
combined_outputs = Add()([bert_dense,lstm_output])
answers_network_output = Dense(3127,activation="softmax")(combined_outputs)
#answers_network = Model(inputs=[input_layer],outputs=[questions_bert_layer,context_bert_layer])
answers_network = Model(inputs=[input_layer],outputs=[answers_network_output])
answers_network.summary()

answers_network.compile("adam","categorical_crossentropy",metrics=[f1])

answers_network_checkpoint = ModelCheckpoint('answers_network-non-rnn-best.h5', verbose=1, monitor='val_f1',save_best_only=True, mode='auto') 

print(answers_network.metrics_names)

testTrainingData = {"question":["Does she sell seashells by the seashore"],"paragraph_context":["She sells seashells by the seashore"],"answer_text":["She sells seashells by the seashore"],"answer_start":[0]}
testTrainingDataSet = pd.DataFrame.from_dict(testTrainingData)
testTrainingDataSet

testTrainingX = testTrainingDataSet[["question","paragraph_context"]]
testTrainingY = testTrainingDataSet["answer_start"]
testTrainingY

answers_network.fit_generator(X_Y_generator(testTrainingDataSet,batch_size=1),steps_per_epoch=100,epochs=100)

如何解决我的错误?

0 个答案:

没有答案