算法的步骤:
1-用P和Q初始化
2-算法的主要过程在第一个for
(循环)开始时开始。
这是我为上述步骤编写的程序:
#!/usr/bin/python
import time
try:
import numpy
import csv
except:
print ("This implementation requires the numpy module.")
exit(0)
###############################################################################
"""
@INPUT:
R : a matrix to be factorized, dimension N x M
P : an initial matrix of dimension N x K
Q : an initial matrix of dimension M x K
K : the number of latent features
steps : the maximum number of steps to perform the optimisation
alpha : the learning rate
beta : the regularization parameter
@OUTPUT:
the final matrices P and Q
"""
def matrix_factorization(R, P, Q, K, steps=5, alpha=0.0002, beta=0.02):
Q = Q.T
for step in range(steps):
print "step",step
for i in range(len(R)):
for j in range(len(R[i])):
if R[i][j] > 0:
#int (type(float(R[i][j])))
eij = float(R[i][j]) - numpy.dot(P[i,:],Q[:,j])
#print(numpy.dot(P[i,:],Q[:,j]))
try:
for k in range(K):
#print "P[i][k]",P[i][k]
#print "Q[k][j]",Q[k][j]
P[i][k] = float(P[i][k]) + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
Q[k][j] = float(Q[k][j]) + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
except:
print "step",step
print "P[i][k]",P[i][k]
print "Q[k][j]",Q[k][j]
eR = numpy.dot(P,Q)
e = 0
return P, Q.T
###############################################################################
R = []
if __name__ == "__main__":
with open('DS.csv') as f:
reader = csv.reader(f)
for row in reader:
R.append(row)
for j in range (6):
R[j]= [numpy.float64(i) for i in R[j]]
R = numpy.array(R)
N = len(R)
M = len(R[0])
K = 2
P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)
nP, nQ = matrix_factorization(R, P, Q, K)
nR = numpy.dot(nP, nQ.T)
print (nR)
但是当我运行上述程序时,我面临以下错误:
>>> ================================ RESTART ================================
>>>
step 0
step 1
step 2
step 3
Warning (from warnings module):
File "C:\Python34\mf.py", line 42
Q[k][j] = float(Q[k][j]) + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
RuntimeWarning: overflow encountered in double_scalars
Warning (from warnings module):
File "C:\Python34\mf.py", line 41
P[i][k] = float(P[i][k]) + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
RuntimeWarning: invalid value encountered in double_scalars
Warning (from warnings module):
File "C:\Python34\mf.py", line 41
P[i][k] = float(P[i][k]) + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
RuntimeWarning: overflow encountered in double_scalars
Warning (from warnings module):
File "C:\Python34\mf.py", line 42
Q[k][j] = float(Q[k][j]) + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
RuntimeWarning: invalid value encountered in double_scalars
step 4
[[ nan nan nan ..., nan nan nan]
[ nan nan nan ..., nan nan nan]
[ nan nan nan ..., nan nan nan]
...,
[ nan nan nan ..., nan nan nan]
[ nan nan nan ..., nan nan nan]
[ nan nan nan ..., nan nan nan]]
>>>