我试图从php调用python脚本。 该脚本可以从命令行以及php执行。 但是,当从php调用它时,我得到一个KeyError,当我从cmd执行它时,我得不到它。
PHP脚本:
$tweets = json_encode($tweets, JSON_UNESCAPED_UNICODE);
$tweetPath = storage_path()."/app/tempTweet.json";
$tweetOpen = fopen($tweetPath, 'w');
fwrite($tweetOpen, $tweets);
fclose($tweetOpen);
$cmd = "python ../app/sentiment_analysis.py ";
$output = shell_exec($cmd); //better results by using shell_exec
Python脚本:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os, json, nltk, re
from collections import Counter
import itertools
reload(sys)
sys.setdefaultencoding('utf-8')
IS_POSSIBLY_UNDETERMINED = True
CERTAINTY_RATE = 0.15
class Tweet():
tokens = [] # List of all the tokens
text = ''
def __init__(self, rawtweet):
self.tokens = []
self.text = ""
self.preprocess(rawtweet)
self.extract_features()
def preprocess(self, rawtweet):
try:
rawtweet = rawtweet.lower()
rawtweet = re.sub('\\n','', rawtweet) #gets rid of line breaks
rawtweet = re.sub('@\S*','AT_USER', rawtweet) #banalizes user references
rawtweet = re.sub('https?://\S*', 'URL ', rawtweet)
rawtweet = re.sub('www\S*', 'URL ', rawtweet) #banalizes links
# self.text = ' \u'.join(tweet.split('\\u')) # attempt to treat emojis
rawtweet = re.sub("[/@'\\$`,\-#%&;.:=[{}()$0.""]", '', rawtweet)
self.text = rawtweet
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
# print(exc_type, fname, exc_tb.tb_lineno)
def extract_features(self):
tokens = [word for word in nltk.word_tokenize(self.text.decode('utf-8'))]
n_grams = []
dict_features = {}
try:
for t in tokens:
n_grams.append(t)
for t in range(len(tokens)-1): # Consecutive words
n_grams.append('+'.join(sorted([tokens[t],tokens[t+1]]))) # Adds consecutive bigrams to n_grams
for t in range(len(tokens)-2): # Two ahead
n_grams.append('+'.join(sorted([tokens[t], tokens[t+2]])))
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
n_grams = []
self.tokens = n_grams
def __del__(self):
self.label = ''
self.tokens = []
self.text = ''
class Classifier():
global_dict = {}
features = {}
features_filename = ''
classifier_filename = ''
def __init__(self, **keyword_parameters):
self.import_global_dict()
# Imports the previous information, or creates blank files and variables
def import_global_dict(self):
self.features_filename = FEATURES_FILE
self.classifier_filename = CLASSIFIER_FILE
# Classifier file
if not os.path.isfile(self.classifier_filename):
f = open(self.classifier_filename, 'w').close()
with open(self.classifier_filename, 'r') as f:
p = f.read()
if f:
try:
self.global_dict = Counter(json.loads(p))
except Exception as e:
self.global_dict = Counter(dict())
f.close()
# Insights file
if not os.path.isfile(self.features_filename):
f = open(self.features_filename, 'w').close()
with open(self.features_filename, 'r') as f:
p = f.read()
if f:
try:
self.features = json.loads(p)
except:
self.features = dict()
f.close()
def make_labels(self, tweets):
self.global_dict = dict(self.global_dict)
for k in tweets:
t = Tweet(tweets[k]['content'])
if len(t.tokens):
output = self.label_prevision_for_tweet(t.tokens)
if output:
# print output
label = output['label']
ratio = output['ratio']
tweets[k]['sentiment'] = {'label' : label, 'certainty' : ratio}
return tweets
def label_prevision_for_tweet(self, tokens):
try:
case_positive = self.features['p(+)']
case_negative = self.features['p(-)']
prob_null_pos = 1000000*(1/ float((self.features['positive_tokens'] + self.features['total_tokens'])))
prob_null_neg = 1000000*(1/ float((self.features['negative_tokens'] + self.features['total_tokens'])))
tokens_dict = {} # Local dict to store the tweet's tokens
for t in tokens:
try: #If tokens exist in global_dict
tokens_dict[t] = self.global_dict[t]
case_positive *= 1000000*tokens_dict[t]['p(+)']
case_negative *= 1000000*tokens_dict[t]['p(-)']
except Exception as e: # Consider existence in dict as 0
case_positive *= prob_null_pos
case_negative *= prob_null_neg
result = case_positive - case_negative
# print result, prob_null_pos, prob_null_neg, case_negative, case_positive
if result >= 0:
label = 'positive'
elif result < 0:
label = 'negative'
res_max = max(case_positive, case_negative)
res_min = min(case_positive, case_negative)
r = 1- res_min/float(res_max)
ratio = '{:.2%}'.format(r)
if (IS_POSSIBLY_UNDETERMINED and (r < CERTAINTY_RATE)):
label = 'undetermined'
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno, sys.exc_info())
label = 'undetermined'
ratio = 0
results = {'label': label,'ratio': ratio}
return results
if __name__ == '__main__':
CLASSIFIER_FILE = 'classifier_global.json'
FEATURES_FILE = 'features_global.json'
TWEET_FILE = '../storage/app/tempTweet.json'
tweets_file = unicode(TWEET_FILE, 'utf-8')
with open(tweets_file) as f:
tweets = json.load(f)
# CLASSIFIER_FILE = str(sys.argv[2])
# FEATURES_FILE = str(sys.argv[3])
d = Classifier()
# print type(tweets)
# print tweets
labelled_tweets = d.make_labels(tweets)
print labelled_tweets
KeyError在casepositive上的label_prevision_for_tweet中发送。 我在php中获得的返回值是KeyError(&#39; p(+)&#39;,)
答案 0 :(得分:1)
下面:
try:
self.features = json.loads(p)
except:
self.features = dict()
如果您因任何原因未能json.loads()
您的文件内容(可能为空,请参阅本部分上方的行),您默默地初始化{{1作为一个空的字典。在这种情况下,不要轻易得到KeyError。
要做的第一件事就是明确地将绝对文件路径传递给Python脚本。然后,如果找不到文件或者他们不包含有效的json,立即提出一个异常信号来表示问题,而不是试图假装一切都没问题。
另外,你的代码似乎很乱。哦,您可能想要学习并使用Python的标准self.features
包,它允许您以更简单的方式记录异常 - 或者只是让异常传播FWIW,它绝对是更简单的方法两者都要确保您的代码不会尝试在意外情况下工作和准确诊断出错的地方以及问题发生的位置(至少还要尝试打印异常,您仍然可以获得奖励积分。即使艰难的方式和错误的输出)。