如何将我的本地bert转换成Bert模型中心

时间:2020-09-24 09:36:19

标签: python-3.x nlp bert-language-model

我想将我的本地bert转换为Bert模型中心。我该怎么做? 我有Python == 3.7和Tensorflow-gpu == 1.15

import os
from os.path import exists, join, basename, splitext
import sys

def download_from_google_drive(file_id, file_name):
  # download a file from the Google Drive link
  !rm -f ./cookie
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=$file_id" > /dev/null
  confirm_text = !awk '/download/ {print $NF}' ./cookie
  confirm_text = confirm_text[0]
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=$confirm_text&id=$file_id" -o $file_name
  
# download a pre-trained model
model_path = 'cased_bert_base'
if not exists(model_path):
  download_from_google_drive('1MOZUKppfX45BEh7nxQ5AvzK-8wIUITr8', '%s.zip' % model_path)
  zip_file = model_path+".zip"
  !unzip $zip_file
  sys.path.append(model_path) 
print('Done!')

-------------------- TO ----------------------- -

BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

0 个答案:

没有答案