RuntimeWarning:double_scalars中遇到无效值

时间:2015-09-12 09:13:58

标签: python python-2.7 numpy floating-point

算法的步骤:

1-用P和Q初始化

2-算法的主要过程在第一个for(循环)开始时开始。

这是我为上述步骤编写的程序:

enter image description here

#!/usr/bin/python
import time

try:
    import numpy
    import csv 
except:
    print ("This implementation requires the numpy module.")
    exit(0)

###############################################################################

"""
@INPUT:
    R     : a matrix to be factorized, dimension N x M
    P     : an initial matrix of dimension N x K
    Q     : an initial matrix of dimension M x K
    K     : the number of latent features
    steps : the maximum number of steps to perform the optimisation
    alpha : the learning rate
    beta  : the regularization parameter
@OUTPUT:
    the final matrices P and Q
"""

def matrix_factorization(R, P, Q, K, steps=5, alpha=0.0002, beta=0.02):
    Q = Q.T
    for step in range(steps):
        print "step",step
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    #int (type(float(R[i][j])))
                    eij = float(R[i][j]) - numpy.dot(P[i,:],Q[:,j])
                    #print(numpy.dot(P[i,:],Q[:,j]))
                    try:
                        for k in range(K):

                        #print "P[i][k]",P[i][k]
                        #print "Q[k][j]",Q[k][j] 
                            P[i][k] = float(P[i][k]) + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                            Q[k][j] = float(Q[k][j]) + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
                    except:
                        print "step",step
                        print "P[i][k]",P[i][k]
                        print "Q[k][j]",Q[k][j] 
        eR = numpy.dot(P,Q)
        e = 0
    return P, Q.T

###############################################################################
R = []
if __name__ == "__main__":
    with open('DS.csv') as f:
        reader = csv.reader(f)
        for row in reader:
            R.append(row)
        for j in range (6):
             R[j]= [numpy.float64(i) for i in R[j]]
    R = numpy.array(R)
    N = len(R)
    M = len(R[0])
    K = 2
    P = numpy.random.rand(N,K)
    Q = numpy.random.rand(M,K)

    nP, nQ = matrix_factorization(R, P, Q, K)
    nR = numpy.dot(nP, nQ.T)
    print (nR)

但是当我运行上述程序时,我面临以下错误:

>>> ================================ RESTART ================================
>>> 
step 0
step 1
step 2
step 3

Warning (from warnings module):
  File "C:\Python34\mf.py", line 42
    Q[k][j] = float(Q[k][j]) + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
RuntimeWarning: overflow encountered in double_scalars

Warning (from warnings module):
  File "C:\Python34\mf.py", line 41
    P[i][k] = float(P[i][k]) + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
RuntimeWarning: invalid value encountered in double_scalars

Warning (from warnings module):
  File "C:\Python34\mf.py", line 41
    P[i][k] = float(P[i][k]) + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
RuntimeWarning: overflow encountered in double_scalars

Warning (from warnings module):
  File "C:\Python34\mf.py", line 42
    Q[k][j] = float(Q[k][j]) + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
RuntimeWarning: invalid value encountered in double_scalars
step 4
[[ nan  nan  nan ...,  nan  nan  nan]
 [ nan  nan  nan ...,  nan  nan  nan]
 [ nan  nan  nan ...,  nan  nan  nan]
 ..., 
 [ nan  nan  nan ...,  nan  nan  nan]
 [ nan  nan  nan ...,  nan  nan  nan]
 [ nan  nan  nan ...,  nan  nan  nan]]
>>> 

0 个答案:

没有答案