可以从命令执行python但从php调用它时出错

时间:2017-01-05 14:16:52

标签: php python

我试图从php调用python脚本。 该脚本可以从命令行以及php执行。 但是,当从php调用它时,我得到一个KeyError,当我从cmd执行它时,我得不到它。

PHP脚本:

$tweets = json_encode($tweets, JSON_UNESCAPED_UNICODE);
$tweetPath = storage_path()."/app/tempTweet.json";
$tweetOpen = fopen($tweetPath, 'w');
fwrite($tweetOpen, $tweets);
fclose($tweetOpen);

$cmd = "python ../app/sentiment_analysis.py ";
$output = shell_exec($cmd); //better results by using shell_exec

Python脚本:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os, json, nltk, re
from collections import Counter
import itertools
reload(sys)
sys.setdefaultencoding('utf-8')


IS_POSSIBLY_UNDETERMINED = True
CERTAINTY_RATE = 0.15


class Tweet():
tokens = [] # List of all the tokens
text = ''

def __init__(self, rawtweet):
    self.tokens = []
    self.text = ""
    self.preprocess(rawtweet)
    self.extract_features()

def preprocess(self, rawtweet):
    try:
        rawtweet = rawtweet.lower()
        rawtweet =  re.sub('\\n','', rawtweet) #gets rid of line breaks
        rawtweet =  re.sub('@\S*','AT_USER', rawtweet) #banalizes user references
        rawtweet =  re.sub('https?://\S*', 'URL ', rawtweet)
        rawtweet =  re.sub('www\S*', 'URL ', rawtweet) #banalizes links
        # self.text = ' \u'.join(tweet.split('\\u')) # attempt to treat emojis
        rawtweet =  re.sub("[/@'\\$`,\-#%&;.:=[{}()$0.""]", '', rawtweet)
        self.text = rawtweet
    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print(exc_type, fname, exc_tb.tb_lineno)


def extract_features(self):

    tokens = [word for word in nltk.word_tokenize(self.text.decode('utf-8'))]

    n_grams = []
    dict_features = {}

    try:
        for t in tokens:
            n_grams.append(t)

        for t in range(len(tokens)-1): # Consecutive words
            n_grams.append('+'.join(sorted([tokens[t],tokens[t+1]]))) # Adds consecutive bigrams to n_grams


        for t in range(len(tokens)-2): # Two ahead
            n_grams.append('+'.join(sorted([tokens[t], tokens[t+2]])))

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
        n_grams = []
    self.tokens = n_grams

def __del__(self):
    self.label = ''
    self.tokens = []
    self.text = ''

class Classifier():

global_dict = {}
features = {}
features_filename = ''
classifier_filename = ''

def __init__(self, **keyword_parameters):

    self.import_global_dict()

# Imports the previous information, or creates blank files and variables
def import_global_dict(self):
    self.features_filename = FEATURES_FILE
    self.classifier_filename = CLASSIFIER_FILE

    # Classifier file
    if not os.path.isfile(self.classifier_filename):
        f = open(self.classifier_filename, 'w').close()
    with open(self.classifier_filename, 'r') as f:
        p = f.read()
        if f:
            try:
                self.global_dict = Counter(json.loads(p))
            except Exception as e:
                self.global_dict = Counter(dict())
        f.close()

    # Insights file
    if not os.path.isfile(self.features_filename):
        f = open(self.features_filename, 'w').close()
    with open(self.features_filename, 'r') as f:
        p = f.read()
        if f:
            try:
                self.features = json.loads(p)
            except:
                self.features = dict()
        f.close()

def make_labels(self, tweets):
    self.global_dict = dict(self.global_dict)
    for k in tweets:
        t = Tweet(tweets[k]['content'])
        if len(t.tokens):
            output = self.label_prevision_for_tweet(t.tokens)
        if output:
            # print output
            label = output['label']
            ratio = output['ratio']

        tweets[k]['sentiment'] = {'label' : label, 'certainty' : ratio}

    return tweets

def label_prevision_for_tweet(self, tokens):
    try:
        case_positive = self.features['p(+)']
        case_negative = self.features['p(-)']
        prob_null_pos = 1000000*(1/ float((self.features['positive_tokens'] + self.features['total_tokens'])))
        prob_null_neg = 1000000*(1/ float((self.features['negative_tokens'] + self.features['total_tokens'])))

        tokens_dict = {} # Local dict to store the tweet's tokens

        for t in tokens:
            try: #If tokens exist in global_dict
                tokens_dict[t] = self.global_dict[t]
                case_positive *= 1000000*tokens_dict[t]['p(+)']
                case_negative *= 1000000*tokens_dict[t]['p(-)']

            except Exception as e: # Consider existence in dict as 0
                case_positive *= prob_null_pos
                case_negative *= prob_null_neg

        result = case_positive - case_negative
        # print result, prob_null_pos, prob_null_neg, case_negative, case_positive
        if result >= 0:
            label = 'positive'
        elif result < 0:
            label = 'negative'

        res_max = max(case_positive, case_negative)
        res_min = min(case_positive, case_negative)
        r = 1- res_min/float(res_max)
        ratio = '{:.2%}'.format(r)

        if (IS_POSSIBLY_UNDETERMINED and (r < CERTAINTY_RATE)):
            label = 'undetermined'

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno, sys.exc_info())
        label = 'undetermined'
        ratio = 0


    results = {'label': label,'ratio': ratio}
    return results


if __name__ == '__main__':

CLASSIFIER_FILE = 'classifier_global.json'
FEATURES_FILE = 'features_global.json'
TWEET_FILE = '../storage/app/tempTweet.json'

tweets_file = unicode(TWEET_FILE, 'utf-8')
with open(tweets_file) as f:
    tweets = json.load(f)

# CLASSIFIER_FILE = str(sys.argv[2])
# FEATURES_FILE = str(sys.argv[3])
d = Classifier()


# print type(tweets)
# print tweets
labelled_tweets = d.make_labels(tweets)
print labelled_tweets

KeyError在casepositive上的label_prevision_for_tweet中发送。 我在php中获得的返回值是KeyError(&#39; p(+)&#39;,)

1 个答案:

答案 0 :(得分:1)

下面:

        try:
            self.features = json.loads(p)
        except:
            self.features = dict()

如果您因任何原因未能json.loads()您的文件内容(可能为空,请参阅本部分上方的行),您默默地初始化{{1作为一个空的字典。在这种情况下,不要轻易得到KeyError。

要做的第一件事就是明确地将绝对文件路径传递给Python脚本。然后,如果找不到文件或者他们不包含有效的json,立即提出一个异常信号来表示问题,而不是试图假装一切都没问题。

另外,你的代码似乎很乱。哦,您可能想要学习并使用Python的标准self.features包,它允许您以更简单的方式记录异常 - 或者只是让异常传播FWIW,它绝对是更简单的方法两者都要确保您的代码不会尝试在意外情况下工作准确诊断出错的地方以及问题发生的位置(至少还要尝试打印异常,您仍然可以获得奖励积分。即使艰难的方式和错误的输出)。