我正在将以红宝石编写的以下SVD recommendation system翻译成Mathematica:
require 'linalg'
users = { 1 => "Ben", 2 => "Tom", 3 => "John", 4 => "Fred" }
m = Linalg::DMatrix[
#Ben, Tom, John, Fred
[5,5,0,5], # season 1
[5,0,3,4], # season 2
[3,4,0,3], # season 3
[0,0,5,3], # season 4
[5,4,4,5], # season 5
[5,4,5,5] # season 6
]
# Compute the SVD Decomposition
u, s, vt = m.singular_value_decomposition
vt = vt.transpose
# Take the 2-rank approximation of the Matrix
# - Take first and second columns of u (6x2)
# - Take first and second columns of vt (4x2)
# - Take the first two eigen-values (2x2)
u2 = Linalg::DMatrix.join_columns [u.column(0), u.column(1)]
v2 = Linalg::DMatrix.join_columns [vt.column(0), vt.column(1)]
eig2 = Linalg::DMatrix.columns [s.column(0).to_a.flatten[0,2], s.column(1).to_a.flatten[0,2]]
# Here comes Bob, our new user
bob = Linalg::DMatrix[[5,5,0,0,0,5]]
bobEmbed = bob * u2 * eig2.inverse
# Compute the cosine similarity between Bob and every other User in our 2-D space
user_sim, count = {}, 1
v2.rows.each { |x|
user_sim[count] = (bobEmbed.transpose.dot(x.transpose)) / (x.norm * bobEmbed.norm)
count += 1
}
# Remove all users who fall below the 0.90 cosine similarity cutoff and sort by similarity
similar_users = user_sim.delete_if {|k,sim| sim < 0.9 }.sort {|a,b| b[1] <=> a[1] }
similar_users.each { |u| printf "%s (ID: %d, Similarity: %0.3f) \\n", users[u[0]], u[0], u[1] }
# We'll use a simple strategy in this case:
# 1) Select the most similar user
# 2) Compare all items rated by this user against your own and select items that you have not yet rated
# 3) Return the ratings for items I have not yet seen, but the most similar user has rated
similarUsersItems = m.column(similar_users[0][0]-1).transpose.to_a.flatten
myItems = bob.transpose.to_a.flatten
not_seen_yet = {}
myItems.each_index { |i|
not_seen_yet[i+1] = similarUsersItems[i] if myItems[i] == 0 and similarUsersItems[i] != 0
}
printf "\\n %s recommends: \\n", users[similar_users[0][0]]
not_seen_yet.sort {|a,b| b[1] <=> a[1] }.each { |item|
printf "\\tSeason %d .. I gave it a rating of %d \\n", item[0], item[1]
}
print "We've seen all the same seasons, bugger!" if not_seen_yet.size == 0
以下是相应的Mathematica代码:
Clear[s, u, v, s2, u2, v2, m, n, testdata, trainingdata, user, user2d];
find1nn[trainingdata_, user_] := {
{u , s, v} = SingularValueDecomposition[Transpose[trainingdata]];
(* Reducr to 2 dimensions. *)
u2 = u[[All, {1, 2}]];
s2 = s[[{1, 2}, {1, 2}]];
v2 = v[[All, {1, 2}]];
user2d = user.u2.Inverse[s2];
{m, n} = Dimensions[v2];
closest = -1;
index = -1;
For[a = 1, a < m, a++,
{distance = 1 - CosineDistance[v2[[a, {1, 2}]], user2d];,
If[distance > closest, {closest = distance, index = a}];}];
closestuserratings = trainingdata[[index]];
closestuserratings
}
rec[closest_, userx_] := {
d = Dimensions[closest];
For[b = 1, b <= d[[2]], b++,
If[userx[[b]] == 0., userx[[b]] = closest[[1, b]]]
]
userx
}
finalrec[td_, user_] := rec[find1nn[td, user], user]
(*Clear[s,u,v,s2,u2,v2,m,n,testdata,trainingdata,user,user2d]*)
testdata = {{5., 5., 3., 0., 5., 5.}, {5., 0., 4., 1., 4., 4.}, {0.,
3., 0., 5., 4., 5.}, {5., 4., 3., 3., 5., 5.}};
bob = {5., 0., 4., 0., 4., 5.};
(*recommend[testdata,bob]*)
find1nn[testdata, bob]
finalrec[testdata, bob]
由于某种原因,它不会在函数内部分配用户的索引,而是在外部分配。可能导致这种情况发生的原因是什么?
答案 0 :(得分:3)
请在Mathematica文档中查找变量本地化教程。问题出在你的rec函数中。问题是你不能正常修改Mathematica中的输入变量(如果你的函数有一个Hold-attributes,你可能会这样做,所以有问题的参数传递给它没有评估,但事实并非如此这里):
rec[closest_, userxi_] :=
Block[{d, b, userx = userxi}, {d = Dimensions[closest];
For[b = 1, b <= d[[2]], b++,
If[userx[[b]] == 0., userx[[b]] = closest[[1, b]]]];
userx}
答案 1 :(得分:1)
不试图了解你想要达到的目标,在这里你有一个更多的数学,但是相当于(我希望)工作代码。
显式循环消失,许多不需要的变量被消除。所有变量现在都是本地变量,因此无需使用Clear []。
find1nn[trainingdata_, user_] :=
Module[{u, s, v, v2, user2d, m, distances},
{u, s, v} = SingularValueDecomposition[Transpose[trainingdata]];
v2 = v[[All, {1, 2}]];
user2d = user.u[[All, {1, 2}]].Inverse[s[[{1, 2}, {1, 2}]]];
m = First@Dimensions[v2];
distances = (1 - CosineDistance[v2[[#, {1, 2}]], user2d]) & /@ Range[m - 1];
{trainingdata[[Ordering[distances][[-1]]]]}];
rec[closest_, userxi_] := userxi[[#]] /. {0. -> closest[[1, #]]} & /@
Range[Dimensions[closest][[2]]];
finalrec[td_, user_] := rec[find1nn[td, user], user];
我相信它仍然可以进行优化。
答案 2 :(得分:1)
以下是基于belisarius代码和Sjoerd改进的镜头。
find1nn[trainingdata_, user_] :=
Module[{u, s, v, user2d, distances},
{u, s, v} = SingularValueDecomposition[trainingdata\[Transpose], 2];
user2d = user . u . Inverse@s;
distances = # ~CosineDistance~ user2d & /@ Most@v;
trainingdata[[ distances ~Ordering~ 1 ]]
]
rec[closest_, userxi_] := If[# == 0, #2, #] & ~MapThread~ {userxi, closest[[1]]}
答案 3 :(得分:0)
Clear[s, u, v, s2, u2, v2, m, n, testdata, trainingdata, user, user2d];
recommend[trainingdata_, user_] := {
{u , s, v} = SingularValueDecomposition[Transpose[trainingdata]];
(* Reducera till 2 dimensioner. *)
u2 = u[[All, {1, 2}]];
s2 = s[[{1, 2}, {1, 2}]];
v2 = v[[All, {1, 2}]];
user2d = user.u2.Inverse[s2];
{m, n} = Dimensions[v2];
closest = -1;
index = -1;
For[a = 1, a < m, a++,
{distance = 1 - CosineDistance[v2[[a, {1, 2}]], user2d];,
If[distance > closest, {closest = distance, index = a}];}];
closestuserratings = trainingdata[[index]];
d = Dimensions[closestuserratings];
updateduser = Table[0, {i, 1, d[[1]]}];
For[b = 1, b <= d[[1]], b++,
If[user[[b]] == 0., updateduser[[b]] = closestuserratings[[b]],
updateduser[[b]] = user[[b]]]
]
updateduser
}
testdata = {{5., 5., 3., 0., 5., 5.}, {5., 0., 4., 1., 4., 4.}, {0.,
3., 0., 5., 4., 5.}, {5., 4., 3., 3., 5., 5.}};
bob = {5., 0., 4., 0., 4., 5.};
recommend[testdata, bob]
{{5。 Null,0。Null,4。Null,1。Null,4。Null,5。Null}}
现在它有效,但为什么Nulls?