我想将我的本地bert转换为Bert模型中心。我该怎么做? 我有Python == 3.7和Tensorflow-gpu == 1.15
import os
from os.path import exists, join, basename, splitext
import sys
def download_from_google_drive(file_id, file_name):
# download a file from the Google Drive link
!rm -f ./cookie
!curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=$file_id" > /dev/null
confirm_text = !awk '/download/ {print $NF}' ./cookie
confirm_text = confirm_text[0]
!curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=$confirm_text&id=$file_id" -o $file_name
# download a pre-trained model
model_path = 'cased_bert_base'
if not exists(model_path):
download_from_google_drive('1MOZUKppfX45BEh7nxQ5AvzK-8wIUITr8', '%s.zip' % model_path)
zip_file = model_path+".zip"
!unzip $zip_file
sys.path.append(model_path)
print('Done!')
-------------------- TO ----------------------- -
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
def create_tokenizer_from_hub_module():
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(
vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()