python列出三个问题并给出错误信息

时间:2018-05-01 14:05:21

标签: python

这是我的python代码,它在构造函数调用中列出了三个问题'E1123:Unexpected关键字参数'n_folds',E1123:构造函数调用中的意外关键字参数'n',E1133:非可迭代值k_fold用于迭代上下文'

import math
import random
import sys
import warnings
from math import sqrt

import numpy as np
import scipy.spatial
import scipy.stats
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

warnings.simplefilter("error")

users = 6040
items = 3952

def readingFile(filename):
f = open(filename,"r")
data = []
for row in f:
    r = row.split(',')
    e = [int(r[0]), int(r[1]), int(r[2])]
    data.append(e)
return data

def similarity_user(data):
print "Hello User"
user_similarity_cosine = np.zeros((users,users))
user_similarity_jaccard = np.zeros((users,users))
user_similarity_pearson = np.zeros((users,users))
for user1 in range(users):
    print user1
    for user2 in range(users):
        if np.count_nonzero(data[user1]) and np.count_nonzero(data[user2]):
            user_similarity_cosine[user1][user2] = 1-scipy.spatial.distance.cosine(data[user1],data[user2])
            user_similarity_jaccard[user1][user2] = 1-scipy.spatial.distance.jaccard(data[user1],data[user2])
            try:
                if not math.isnan(scipy.stats.pearsonr(data[user1],data[user2])[0]):
                    user_similarity_pearson[user1][user2] = scipy.stats.pearsonr(data[user1],data[user2])[0]
                else:
                    user_similarity_pearson[user1][user2] = 0
            except:
                user_similarity_pearson[user1][user2] = 0


return user_similarity_cosine, user_similarity_jaccard, user_similarity_pearson

def modelSelection(data):
k_fold = KFold(n=len(data), n_folds=10)

Mat = np.zeros((users,items))
for e in data:
    Mat[e[0]-1][e[1]-1] = e[2]

sim_user_cosine, sim_user_jaccard, sim_user_pearson = similarity_user(Mat)


'''sim_user_cosine = np.zeros((users,users))
sim_user_jaccard = np.zeros((users,users))
sim_user_pearson = np.zeros((users,users))

f_sim = open("sim_user_based.txt", "r")
for row in f_sim:
    r = row.strip().split(',')
    sim_user_cosine[int(r[0])][int(r[1])] = float(r[2])
    sim_user_jaccard[int(r[0])][int(r[1])] = float(r[3])
    sim_user_pearson[int(r[0])][int(r[1])] = float(r[4])
f_sim.close()'''

rmse_cosine = []
rmse_jaccard = []
rmse_pearson = []

for train_indices, test_indices in k_fold:
    train = [data[i] for i in train_indices]
    test = [data[i] for i in test_indices]

    M = np.zeros((users,items))

    for e in train:
        M[e[0]-1][e[1]-1] = e[2]

    true_rate = []
    pred_rate_cosine = []
    pred_rate_jaccard = []
    pred_rate_pearson = []

    for e in test:
        user = e[0]
        item = e[1]
        true_rate.append(e[2])

        pred_cosine = 3.0
        pred_jaccard = 3.0
        pred_pearson = 3.0

        #user-based
        if np.count_nonzero(M[user-1]):
            sim_cosine = sim_user_cosine[user-1]
            sim_jaccard = sim_user_jaccard[user-1]
            sim_pearson = sim_user_pearson[user-1]
            ind = (M[:,item-1] > 0)
            #ind[user-1] = False
            normal_cosine = np.sum(np.absolute(sim_cosine[ind]))
            normal_jaccard = np.sum(np.absolute(sim_jaccard[ind]))
            normal_pearson = np.sum(np.absolute(sim_pearson[ind]))
            if normal_cosine > 0:
                pred_cosine = np.dot(sim_cosine,M[:,item-1])/normal_cosine

            if normal_jaccard > 0:
                pred_jaccard = np.dot(sim_jaccard,M[:,item-1])/normal_jaccard

            if normal_pearson > 0:
                pred_pearson = np.dot(sim_pearson,M[:,item-1])/normal_pearson

        if pred_cosine < 0:
            pred_cosine = 0

        if pred_cosine > 5:
            pred_cosine = 5

        if pred_jaccard < 0:
            pred_jaccard = 0

        if pred_jaccard > 5:
            pred_jaccard = 5

        if pred_pearson < 0:
            pred_pearson = 0

        if pred_pearson > 5:
            pred_pearson = 5

        print str(user) + "\t" + str(item) + "\t" + str(e[2]) + "\t" + str(pred_cosine) + "\t" + str(pred_jaccard) + "\t" + str(pred_pearson)
        pred_rate_cosine.append(pred_cosine)
        pred_rate_jaccard.append(pred_jaccard)
        pred_rate_pearson.append(pred_pearson)

    rmse_cosine.append(sqrt(mean_squared_error(true_rate, pred_rate_cosine)))
    rmse_jaccard.append(sqrt(mean_squared_error(true_rate, pred_rate_jaccard)))
    rmse_pearson.append(sqrt(mean_squared_error(true_rate, pred_rate_pearson)))

    print str(sqrt(mean_squared_error(true_rate, pred_rate_cosine))) + "\t" + str(sqrt(mean_squared_error(true_rate, pred_rate_jaccard))) + "\t" + str(sqrt(mean_squared_error(true_rate, pred_rate_pearson)))
    #raw_input()

#print sum(rms) / float(len(rms))
rmse_cosine = sum(rmse_cosine) / float(len(rmse_cosine))
rmse_pearson = sum(rmse_pearson) / float(len(rmse_pearson))
rmse_jaccard = sum(rmse_jaccard) / float(len(rmse_jaccard))

print str(rmse_cosine) + "\t" + str(rmse_jaccard) + "\t" + str(rmse_pearson)

f_rmse = open("results/rmse_user.txt","w")
f_rmse.write(str(rmse_cosine) + "\t" + str(rmse_jaccard) + "\t" + str(rmse_pearson) + "\n")

rmse = [rmse_cosine, rmse_jaccard, rmse_pearson]
req_sim = rmse.index(min(rmse))

print req_sim
f_rmse.write(str(req_sim))
f_rmse.close()

if req_sim == 0:
    sim_mat_user = sim_user_cosine

if req_sim == 1:
    sim_mat_user = sim_user_jaccard

if req_sim == 2:
    sim_mat_user = sim_user_pearson

#predictRating(Mat, sim_mat_user)
return Mat, sim_mat_user


def predictRating(recommend_data):

M, sim_user = modelSelection(recommend_data)

#f = open("toBeRated.csv","r")
f = open(sys.argv[2],"r")
toBeRated = {"user":[], "item":[]}
for row in f:
    r = row.split(',')  
    toBeRated["item"].append(int(r[1]))
    toBeRated["user"].append(int(r[0]))

f.close()

pred_rate = []

#fw = open('result1.csv','w')
fw_w = open('results/result1.csv','w')

l = len(toBeRated["user"])
for e in range(l):
    user = toBeRated["user"][e]
    item = toBeRated["item"][e]

    pred = 3.0

    #user-based
    if np.count_nonzero(M[user-1]):
        sim = sim_user[user-1]
        ind = (M[:,item-1] > 0)
        #ind[user-1] = False
        normal = np.sum(np.absolute(sim[ind]))
        if normal > 0:
            pred = np.dot(sim,M[:,item-1])/normal

    if pred < 0:
        pred = 0

    if pred > 5:
        pred = 5

    pred_rate.append(pred)
    print (str(user) + "," + str(item) + "," + str(pred))
    #fw.write(str(user) + "," + str(item) + "," + str(pred) + "\n")
    fw_w.write(str(pred) + "\n")

#fw.close()
fw_w.close()

#recommend_data = readingFile("ratings.csv")
recommend_data = readingFile(sys.argv[1])
#crossValidation(recommend_data)
predictRating(recommend_data)

然后它给出了这个错误代码

'pydevd.main()
  File "C:\Users\Morakinyo\.vscode\extensions\ms-python.python-2018.3.1\pythonFiles\experimental\ptvsd\ptvsd\pydevd\pydevd.py", line 1628,
in main
    globals = debugger.run(setup['file'], None, None, is_module)
  File "C:\Users\Morakinyo\.vscode\extensions\ms-python.python-2018.3.1\pythonFiles\experimental\ptvsd\ptvsd\pydevd\pydevd.py", line 1035,
in run
    pydev_imports.execfile(file, globals, locals)  # execute the script
  File "c:\Users\Morakinyo\Documents\recommend\Coll\code\userBased.py", line 227, in <module>
    recommend_data = readingFile(sys.argv[1])
IndexError: list index out of range'

请帮助我成为python编程lhanguage的新手

0 个答案:

没有答案