Question

我正在尝试运行存储库：Sherlock

代码在这里：

__author__ = "Armin"

import csv
import svm
from svmutil import *
import pickle
import re

learningSet = csv.reader(open("train.csv"))

# we have 5 class so, 5 classifier (opn, agr, ext, neu, con)
Features_con = []
Features_opn = []
Features_agr = []
Features_ext = []
Features_neu = []

users = []
allFeatures = []

pattern = re.compile(r"(.)\1{1,}", re.DOTALL)

def preProcess(status):
    re.sub('((www\.[^\s]+)|(https?://[^\s]+))', '__LINK__', status)
    re.sub(r'\d+', '', status)
    return status

def FeaturesVector(status):
    words = set(status)
    features = {}
    for word in allFeatures:
        features[word] = (word in status)
    return features

def getFeatures(status):
    fv = []
    bagsOfWord = status.split()
    for word in bagsOfWord:
        word = word.strip('...?!')
        word = pattern.sub(r"\1\1\1", word)
        fv.append(word)
    return fv

def save_classifier(classifier, name):
    f = open(name+'.pickle', 'wb')
    pickle.dump(classifier, f)
    f.close()

def load_classifier(name):
    f = open(name + '.pickle', 'rb')
    classifier = pickle.load(f)
    f.close()
    return classifier

def makeDict(features, featureList):
    sortedFeatures = sorted(featureList)
    feature_vector = []
    labels = []
    for t in features:
        label = 0
        Map = {}
        for w in sortedFeatures:
            Map[w] = 0

        words = t[0]
        c = t[1]
        for word in words:
            if word in Map:
                Map[word] = 1

        values = Map.values()
        feature_vector.append(values)
        if(c == 'y'):
            label = 1
        elif(c == 'n'):
            label = 0
        labels.append(label)
    return {'feature_vector' : feature_vector, 'labels': labels}

def makeDictTest(features, featuresList):
    sortedFeatures = sorted(featuresList)
    feature_vector = []
    for t in features:
        Map = {}
        for w in sortedFeatures:
            Map[w] = 0

        words = t
        for word in words:
            if word in Map:
                Map[word] = 1

        values = Map.values()
        feature_vector.append(values)

    return feature_vector


def svm(features, allFeatures):
    res = makeDict(features, allFeatures)
    problem = svm_problem(res['labels'], res['feature_vector'])
    param = svm_parameter('-q')
    param.kernel_type = LINEAR
    classifier = svm_train(problem, param)
    return classifier

def yesOrNo(resList):
    counter = 0;
    for i in resList:
        if i == 0.0:
            counter += 1
    if(counter > len(resList)/2):
        return 'n'
    return 'y'

again = True
#mess = input("Do you want to load classifier? (y/n) ")
#if(mess == 'n'):
    #again = True

if(again):
    # extracting ans saving features
    for line in learningSet:
        userId = line[0]
        status = line[1]
        ext = line[7]
        neu = line[8]
        agr = line[9]
        con = line[10]
        opn = line[11]
        # date
        date = line[12]

        # network features
        networkSize = line[13]
        nBetweenness = line[15]
        density = line[16]
        nBrokerage = line[18]
        transitivity = line[19]
        if userId not in users:
            users.append(userId)
            # network size
            Features_con.append((networkSize, con))
            Features_ext.append((networkSize, ext))
            Features_agr.append((networkSize, agr))
            Features_neu.append((networkSize, neu))
            Features_opn.append((networkSize, opn))
            # normal betweenness
            Features_con.append((nBetweenness, con))
            Features_ext.append((nBetweenness, ext))
            Features_agr.append((nBetweenness, agr))
            Features_neu.append((nBetweenness, neu))
            Features_opn.append((nBetweenness, opn))
            # density
            Features_con.append((density, con))
            Features_ext.append((density, ext))
            Features_agr.append((density, agr))
            Features_neu.append((density, neu))
            Features_opn.append((density, opn))
            # normal brokerage
            Features_con.append((nBrokerage, con))
            Features_ext.append((nBrokerage, ext))
            Features_agr.append((nBrokerage, agr))
            Features_neu.append((nBrokerage, neu))
            Features_opn.append((nBrokerage, opn))
            # transitivity
            Features_con.append((transitivity, con))
            Features_ext.append((transitivity, ext))
            Features_agr.append((transitivity, agr))
            Features_neu.append((transitivity, neu))
            Features_opn.append((transitivity, opn))
            # add to all
            allFeatures.append(networkSize)
            allFeatures.append(nBetweenness)
            allFeatures.append(nBrokerage)
            allFeatures.append(transitivity)
            allFeatures.append(density)

        # date feature
        Features_con.append((date, con))
        Features_neu.append((date, neu))
        Features_agr.append((date, agr))
        Features_opn.append((date, opn))
        Features_ext.append((date, ext))
        allFeatures.append(date)

        # linguestic features
        status = preProcess(status)
        statusFeatures = getFeatures(status)
        allFeatures.extend(statusFeatures)
        Features_con.append((statusFeatures, con))
        Features_neu.append((statusFeatures, neu))
        Features_agr.append((statusFeatures, agr))
        Features_opn.append((statusFeatures, opn))
        Features_ext.append((statusFeatures, ext))
    allFeatures = list(set(allFeatures))


    print("Training classifiers...")
    # training classifiers
    SVM_CON = svm(Features_con, allFeatures)
    print("Done 1 from 5.")
    SVM_AGR = svm(Features_agr, allFeatures)
    print("Done 2 from 5.")
    SVM_NEU = svm(Features_neu, allFeatures)
    print("Done 3 from 5.")
    SVM_EXT = svm(Features_ext, allFeatures)
    print("Done 4 from 5.")
    SVM_OPN = svm(Features_opn, allFeatures)
    print("Done 5 from 5.")
    print("Training classifiers done.")

else:
    print("loading classifiers ...")



Done = False

while(not Done):
    testIn = input("Status: ")
    nB = input("Normal Betweenness: ")
    nBr = input("Normal Brokerage: ")
    size = input("Normal Network Size: ")
    tr = input("Normal Transitivity: ")
    den = input("Normal Density: ")
    dt = input("Date :")
    FV = getFeatures(testIn);
    FV.append(nBr)
    FV.append(nB)
    FV.append(size)
    FV.append(tr)
    FV.append(den)
    FV.append(dt)
    fv = makeDictTest(FV, allFeatures)
    con_label, con_acc, con_val = svm_predict([0] * len(fv),fv, SVM_CON)
    ext_label, ext_acc, ext_val =  svm_predict([0] * len(fv),fv, SVM_EXT)
    agr_label, agr_acc, agr_val = svm_predict([0] * len(fv),fv, SVM_AGR)
    neu_label, neu_acc, neu_val = svm_predict([0] * len(fv),fv, SVM_NEU)
    opn_label, opn_acc, opn_val = svm_predict([0] * len(fv),fv, SVM_OPN)
    print(con_label)
    print("Extraversion : " + str(yesOrNo(ext_label)))
    print("Neuroticism : " + str(yesOrNo(neu_label)))
    print("Agreeableness : " + str(yesOrNo(agr_label)))
    print("Conscientiousness : " + str(yesOrNo(con_label)))
    print("Openness : " + str(yesOrNo(opn_label)))
    mess = input("Do you want to countinue? (y/n) ")
    if mess == "n":
        Done = True

testingSet = csv.reader(open("test.csv"))

print("Evaluating ...")

tp_con = 0
tn_con = 0
fn_con = 0
fp_con = 0

tp_ext = 0
tn_ext = 0
fn_ext = 0
fp_ext = 0

tp_agr = 0
tn_agr = 0
fn_agr = 0
fp_agr = 0

tp_opn = 0
tn_opn = 0
fn_opn = 0
fp_opn = 0

tp_neu = 0
tn_neu = 0
fn_neu = 0
fp_neu = 0

for line in testingSet:
        testStatus = line[1]
        testExt = line[7]
        testNeu = line[8]
        testAgr = line[9]
        testCon = line[10]
        testOpn = line[11]
        testDate = line[12]
        testNetworkSize = line[13]
        testNBetweenness = line[15]
        testDensity = line[16]
        testNBrokerage = line[18]
        testTransitivity = line[19]
        FV = getFeatures(testStatus)
        FV.append(testNetworkSize)
        FV.append(testNBetweenness)
        FV.append(testDensity)
        FV.append(testNBrokerage)
        FV.append(testDate)
        FV.append(testTransitivity)

        classCon = NBC_con.classify(FeaturesVector(FV))
        classExt = NBC_ext.classify(FeaturesVector(FV))
        classAgr = NBC_agr.classify(FeaturesVector(FV))
        classNeu = NBC_neu.classify(FeaturesVector(FV))
        classOpn = NBC_opn.classify(FeaturesVector(FV))

        if classCon == testCon and testCon == 'y':
            tp_con += 1
        if classCon == testCon and testCon == 'n':
            tn_con += 1
        if classCon != testCon and testCon == 'y':
            fp_con += 1
        if classCon != testCon and testCon == 'n':
            fn_con += 1

        if classExt == testExt and testExt == 'y':
            tp_ext += 1
        if classExt == testExt and testExt == 'n':
            tn_ext += 1
        if classExt == testExt and testExt == 'y':
            fp_ext += 1
        if classExt == testExt and testExt == 'n':
            fn_ext += 1

        if classOpn == testOpn and testOpn == 'y':
            tp_opn += 1
        if classOpn == testOpn and testOpn == 'n':
            tn_opn += 1
        if classOpn == testOpn and testOpn == 'y':
            fp_opn += 1
        if classOpn == testOpn and testOpn == 'n':
            fn_opn += 1

        if classAgr == testAgr and testAgr == 'y':
            tp_agr += 1
        if classAgr == testAgr and testAgr == 'n':
            tn_agr += 1
        if classAgr == testAgr and testAgr == 'y':
            fp_agr += 1
        if classAgr == testAgr and testAgr == 'n':
            fn_agr += 1

        if classNeu == testNeu and testNeu == 'y':
            tp_neu += 1
        if classNeu == testNeu and testNeu == 'n':
            tn_neu += 1
        if classNeu == testNeu and testNeu == 'y':
            fp_neu += 1
        if classNeu == testNeu and testNeu == 'n':
            fn_neu += 1

def precision(tp, tn, fp, fn):
    return 0.5 * (tp/(tp+fp) + tn/(tn+fn))

def recall(tp, tn, fp, fn):
    return 0.5 * (tp/(tp+fn) + tn/(tn+fp))

def F1(tp, tn, fp, fn):
    pre = precision(tp, tn, fp, fn)
    re = recall(tp, tn, fp, fn)
    return 2 * (pre * re / (pre + re))


print ("Con Pre(avg) = " + str(precision(tp_con, tn_con, fp_con, fn_con)))
print ("Con Re(avg) = " + str(recall(tp_con, tn_con, fp_con, fn_con))    )
print ("Con F1(avg) = " + str(F1(tp_con, tn_con, fp_con, fn_con))        )
print ("Agr Pre(avg) = " + str(precision(tp_agr, tn_agr, fp_agr, fn_agr)))
print ("Agr Re(avg) = " + str(recall(tp_agr, tn_agr, fp_agr, fn_agr))    )
print ("Agr F1(avg) = " + str(F1(tp_agr, tn_agr, fp_agr, fn_agr))        )
print ("Ext Pre(avg) = " + str(precision(tp_ext, tn_ext, fp_ext, fn_ext)))
print ("Ext Re(avg) = " + str(recall(tp_ext, tn_ext, fp_ext, fn_ext))    )
print ("Ext F1(avg) = " + str(F1(tp_ext, tn_ext, fp_ext, fn_ext))        )
print ("Neu Pre(avg) = " + str(precision(tp_neu, tn_neu, fp_neu, fn_neu)))
print ("Neu Re(avg) = " + str(recall(tp_neu, tn_neu, fp_neu, fn_neu))    )
print ("Neu F1(avg) = " + str(F1(tp_neu, tn_neu, fp_neu, fn_neu))        )
print ("Opn Pre(avg) = " + str(precision(tp_opn, tn_opn, fp_opn, fn_opn)))
print ("Opn Re(avg) = " + str(recall(tp_opn, tn_opn, fp_opn, fn_opn))    )
print ("Opn F1(avg) = " + str(F1(tp_opn, tn_opn, fp_opn, fn_opn))        )

当我尝试从存储库运行示例时，我遇到了以下错误：
python SherLockWithSVM.py 训练分类......

追踪（最近一次呼叫最后一次）：

File "SherLockWithSVM.py", line 201, in <module>
SVM_CON = svm(Features_con, allFeatures)
File "SherLockWithSVM.py", line 101, in svm
problem = svm_problem(res['labels'], res['feature_vector'])
File "C:\Python35\lib\site-packages\svm.py", line 103, in __init__
tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
File "C:\Python35\lib\site-packages\svm.py", line 71, in gen_svm_nodearray
raise TypeError('xi should be a dictionary, list or tuple')
TypeError: xi should be a dictionary, list or tuple

请帮助我。我使用的是Python3.5

错误：TypeError：xi应该是使用python 3.5的字典，列表或元组

0 个答案: