请尝试在ACI上部署机器学习模型,但是在尝试在Azure上将模型作为Web服务部署时,在最后阶段出现以下错误。帮助建议如何修复它。
2020-08-10T11:01:28,103498848+00:00 - rsyslog/run
2020-08-10T11:01:28,109724839+00:00 - nginx/run
2020-08-10T11:01:28,111248110+00:00 - gunicorn/run
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2020-08-10T11:01:28,192849628+00:00 - iot-server/finish 1 0
2020-08-10T11:01:28,194532107+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 19.9.0
Listening at: http://127.0.0.1:31311 (18)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 42
SPARK_HOME not set. Skipping PySpark Initialization.
Exception in worker process
Traceback (most recent call last):
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker
worker.init_process()
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/workers/base.py", line 129, in init_process
self.load_wsgi()
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/workers/base.py", line 138, in load_wsgi
self.wsgi = self.app.wsgi()
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/app/base.py", line 67, in wsgi
self.callable = self.load()
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 52, in load
return self.load_wsgiapp()
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 41, in load_wsgiapp
return util.import_app(self.app_uri)
File "/azureml-envs/azureml_d31cd964833447a6573171273c4f1235/lib/python3.7/site-packages/gunicorn/util.py", line 350, in import_app
__import__(module)
File "/var/azureml-server/wsgi.py", line 1, in <module>
import create_app
File "/var/azureml-server/create_app.py", line 3, in <module>
from app import main
File "/var/azureml-server/app.py", line 31, in <module>
import main as user_main
ModuleNotFoundError: No module named 'main'
Worker exiting (pid: 42)
Shutting down: Master
Reason: Worker failed to boot.
2020-08-10T11:01:28,524618650+00:00 - gunicorn/finish 3 0
2020-08-10T11:01:28,526020716+00:00 - Exit code 3 is not normal. Killing image.
这是正在使用的计分脚本。请告知我是否需要导入任何功能。对我来说一切都还好。发现很难发现任何问题
from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse
import json
from azureml.core.model import Model
from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse
# Done - Faster
from sentence_transformers import SentenceTransformer
from sentence_transformers import models, losses
import scipy.spatial
import pickle as pkl
import pickle
import pandas as pd
import os
from collections import OrderedDict
import re
def clean_text(text):
text = re.sub('\r\n','\n',text)
text = re.sub('\r','',text)
text = re.sub(r'\s+',' ',text)
text = re.sub(r'\n',' ',text)
return text
def init():
print("This is init()")
global model, on_path, ifc_inv, ifc_adv, wb_lend, wb_adv, smart_lesson, ifc_inv_data_map, ifc_adv_data_map, wb_lend_data_map, wb_adv_data_map, smart_lesson_data_map
on_path = Model.get_model_path(model_name='Docker_custom_KP_model', version = 2)
## Read in Data Files Fix
ifc_inv = pd.read_csv(os.path.join(on_path,'Assets/data_file/ifc_data/ifc_inv.csv'), lineterminator='\n')
ifc_adv = pd.read_csv(os.path.join(on_path,'Assets/data_file/ifc_data/ifc_adv.csv'), lineterminator='\n')
# WB data
wb_lend = pd.read_csv(os.path.join(on_path,'Assets/data_file/wb_data/wb_lend.csv'), lineterminator='\n')
wb_adv = pd.read_csv(os.path.join(on_path,'Assets/data_file/wb_data/wb_adv.csv') , lineterminator='\n')
# Lesson data
smart_lesson = pd.read_csv(os.path.join(on_path,'Assets/data_file/lesson_data/smart_lesson.csv'), lineterminator='\n')
## Read in Data Map #Fix
ifc_inv_data_map = pd.read_csv(os.path.join(on_path,'Assets/data_map/ifc_inv_data_map.csv'))
ifc_adv_data_map = pd.read_csv(os.path.join(on_path,'Assets/data_map/ifc_adv_data_map.csv'))
wb_lend_data_map = pd.read_csv(os.path.join(on_path,'Assets/data_map/wb_lend_data_map.csv'))
wb_adv_data_map = pd.read_csv(os.path.join(on_path,'Assets/data_map/wb_adv_data_map.csv'))
smart_lesson_data_map = pd.read_csv(os.path.join(on_path,'Assets/data_map/smart_lesson_data_map.csv'))
## Drop irrelevant columns
ifc_inv.drop(['Unnamed: 0'], axis = 1, inplace= True)
ifc_inv_data_map.drop(['Unnamed: 0'], axis = 1, inplace= True)
ifc_adv.drop(['Unnamed: 0'], axis = 1, inplace= True)
ifc_adv_data_map.drop(['Unnamed: 0'], axis = 1, inplace= True)
wb_lend.drop(['Unnamed: 0'], axis = 1, inplace= True)
wb_lend_data_map.drop(['Unnamed: 0'], axis = 1, inplace= True)
wb_adv.drop(['Unnamed: 0'], axis = 1, inplace= True)
wb_adv_data_map.drop(['Unnamed: 0'], axis = 1, inplace= True)
smart_lesson.drop(['Unnamed: 0'], axis = 1, inplace= True)
smart_lesson_data_map.drop(['Unnamed: 0'], axis = 1, inplace= True)
## CLean data
# ## TOOD - After UAT
ifc_inv['Project_Description'] = ifc_inv['Project_Description'].map(clean_text)
ifc_adv['Project_Description'] = ifc_adv['Project_Description'].map(clean_text)
wb_lend['proj_abstract'] = wb_lend['proj_abstract'].map(clean_text)
wb_adv['proj_abstract'] = wb_adv['proj_abstract'].map(clean_text)
smart_lesson['Abstracts'] = smart_lesson['Abstracts'].map(clean_text)
## end Clean data
## Read in Models
#word_embedding_model = models.BERT('/dbfs/FileStore/tables/dev/models/BERT_model_new/') ## fix
word_embedding_model = models.BERT(os.path.join(on_path,'model_dir/'))
# Applying mean pooling to get one fixed sized sentences vector
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
pooling_mode_mean_tokens = True,
pooling_mode_cls_token = False,
pooling_mode_max_tokens = False
)
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
def text_to_embedding(model, in_text):
query_embeddings = model.encode(in_text, show_progress_bar=True)
return query_embeddings
@rawhttp
def run(request):
print("This is run()")
print("Request: [{0}]".format(request))
if request.method == 'GET':
# For this example, just return the URL for GETs.
respBody = str.encode(request.full_path)
return AMLResponse(respBody, 200)
elif request.method == 'POST':
reqBody = request.get_data(False)
in_text = json.loads(reqBody)['input_query']
in_text = [in_text]
req_page = json.loads(reqBody)['request_page']
n_row = json.loads(reqBody)['n_request']
n_row = int(n_row)
if req_page == 'IFCInvestment':
relevant_file_dir = os.path.join(on_path,'Assets/data_file/ifc_data/ifc_inv.csv')
data_map = os.path.join(on_path,'Assets/data_map/ifc_inv_data_map.csv')
#vector_dir = './KPrequest/inference_data/' + request_page + '/faiss_inv.index'
with open(os.path.join(on_path,'Assets/embeddings/embedding_ifc_inv.pkl'), 'rb') as f:
corpus_vecs = pickle.load(f)
vec = text_to_embedding(model, in_text)
#vec = [vec]
relevant_df = ifc_inv #pd.read_csv(relevant_file_dir, lineterminator='\n')
data_map_df = ifc_inv_data_map #pd.read_csv(data_map)
#relevant_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
#data_map_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
if req_page == 'IFCAdv':
relevant_file_dir = os.path.join(on_path,'Assets/data_file/ifc_data/ifc_adv.csv')
data_map = os.path.join(on_path,'Assets/data_map/ifc_adv_data_map.csv')
#vector_dir = './KPrequest/inference_data/' + request_page + '/faiss_inv.index'
with open(os.path.join(on_path,'Assets/embeddings/embedding_ifc_adv.pkl'), 'rb') as f:
corpus_vecs = pickle.load(f)
vec = text_to_embedding(model, in_text)
#vec = [vec]
relevant_df = ifc_adv #pd.read_csv(relevant_file_dir, lineterminator='\n')
data_map_df = ifc_adv_data_map #pd.read_csv(data_map)
#relevant_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
#data_map_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
if req_page == 'IFCLesson':
relevant_file_dir = os.path.join(on_path,'Assets/data_file/lesson_data/smart_lesson.csv')
data_map = os.path.join(on_path,'Assets/data_map/smart_lesson_data_map.csv')
#vector_dir = './KPrequest/inference_data/' + request_page + '/faiss_inv.index'
with open(os.path.join(on_path,'Assets/embeddings/embedding_smart_lesson.pkl'), 'rb') as f:
corpus_vecs = pickle.load(f)
vec = text_to_embedding(model, in_text)
#vec = [vec]
relevant_df = smart_lesson #pd.read_csv(relevant_file_dir, lineterminator='\n')
data_map_df = smart_lesson_data_map #pd.read_csv(data_map)
#relevant_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
#data_map_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
if req_page == 'WBLend':
relevant_file_dir = os.path.join(on_path,'Assets/data_file/wb_data/wb_lend.csv')
data_map = os.path.join(on_path,'Assets/data_map/wb_lend_data_map.csv')
#vector_dir = './KPrequest/inference_data/' + request_page + '/faiss_inv.index'
with open(os.path.join(on_path,'Assets/embeddings/embedding_wb_lend.pkl'), 'rb') as f:
corpus_vecs = pickle.load(f)
vec = text_to_embedding(model, in_text)
#vec = [vec]
relevant_df = wb_lend #pd.read_csv(relevant_file_dir, lineterminator='\n')
data_map_df = wb_lend_data_map #pd.read_csv(data_map)
#relevant_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
#data_map_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
if req_page == 'WBAdv':
relevant_file_dir = os.path.join(on_path,'Assets/data_file/wb_data/wb_adv.csv')
data_map = os.path.join(on_path,'Assets/data_map/wb_adv_data_map.csv')
#vector_dir = './KPrequest/inference_data/' + request_page + '/faiss_inv.index'
with open(os.path.join(on_path,'Assets/embeddings/embedding_wb_adv.pkl'), 'rb') as f:
corpus_vecs = pickle.load(f)
vec = text_to_embedding(model, in_text)
#vec = [vec]
relevant_df = wb_adv #pd.read_csv(relevant_file_dir, lineterminator='\n')
data_map_df = wb_adv_data_map #pd.read_csv(data_map)
#relevant_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
#data_map_df.drop(['Unnamed: 0'], axis = 1, inplace= True)
distances = scipy.spatial.distance.cdist(vec, corpus_vecs, "cosine")[0]
results = zip(range(len(distances)), distances)
results = sorted(results, key = lambda x: x[1])
result_index = list(map(lambda x: x[0],results))
map_index = data_map_df.iloc[result_index]
proj_index = map_index['Index'].to_list()
unique_proj_index = list(OrderedDict.fromkeys(proj_index))
relevant_df.fillna('', inplace=True)
kp_dataframe = relevant_df.loc[unique_proj_index[0: n_row]]
kp_result = kp_dataframe.to_dict(orient='records')
resp = AMLResponse(json.dumps(kp_result), 200)
resp.headers['Access-Control-Allow-Origin'] = '*'
return resp
else:
return AMLResponse("bad request", 500)```