我想使用Simon Funks的算法构建推荐系统。 我们的想法是首先在Matlab中离线构建模型,对结果进行一些评估,以获得最佳性能(或维度)。
我有一个用户x项的矩阵,其中等级'0'表示用户没有给该项目评分。
到目前为止,我已尝试在Matlab中实现Simon Funks的算法(参见源代码),但它的表现确实很差(!)。偶然我发现Matlab的SVDS功能填补了空的等级,它实际上比我的Simon Funks的功能更好,更快
有人能告诉我这可能是什么原因造成的?或者告诉我在Matlab函数中犯了什么愚蠢的错误? :P
function ratings = simon_funk(original_ratings, dimensions) % To construct a complete rating matrix
% See: http://www.timelydevelopment.com/demos/NetflixPrize.aspx
% Variables
global max_features; max_features = dimensions;
global init; init = 0.1;
min_epochs = 2;
max_epochs = 16;
min_improvement = 0.001;
lrate = 0.01;
k = 0.015;
respondent_count = size(original_ratings, 1);
item_count = size(original_ratings, 2);
rating_count = 0;
data = [];
for r=1:respondent_count
for i=1:item_count
rating = original_ratings(r,i);
if(rating > 0)
data = [data; [r, i, rating, 0]];
rating_count = rating_count + 1;
end
end
end
% Now data contain all ratings in the form [user_id, item_id, rating, cache (default 0)]
ratings = zeros(respondent_count, item_count);
global item_features; item_features = zeros(max_features, item_count);
global respondent_features; respondent_features = zeros(max_features, respondent_count);
% Init
item_features(:,:) = init;
respondent_features(:,:) = init;
% CalcFeatures
rmse_last = 2.0;
rmse = 2.0;
for f=1:max_features
for e=1:max_epochs
sq = 0;
rmse_last = rmse;
for r=1:rating_count
% for i=1:item_count
respondent = data(r,1);
item = data(r,2);
rating = data(r,3);
% Predict rating
p = simon_funk_predict_rating(data(r,:), f, 1);
err = (1.0 * rating - p);
sq = sq + err*err;
rf = respondent_features(f,respondent);
mf = item_features(f, item);
% Cross-train the features
respondent_features(f, respondent) = respondent_features(f,respondent) + (lrate * (err * mf - k * rf));
item_features(f, item) = item_features(f, item) + (lrate * (err * rf - k * mf));
% end
end
rmse = sqrt(sq/rating_count);
%if (e >= min_epochs && rmse > (rmse_last - min_improvement))
if ((e >= min_epochs) && ((rmse_last - rmse) < min_improvement))
break;
end
end
% Caching
for r=1:rating_count
data(r, 4) = simon_funk_predict_rating(data(r,:), f, 0);
end
end
% return new ratings set
for r=1:respondent_count
for i=1:item_count
sum = 1;
for f=1:max_features
sum = sum + item_features(f,i) * respondent_features(f,r);
if(sum > 10)
% sum = 10;
end
if(sum < 1)
% sum = 1;
end
end
ratings(r,i) = sum;
end
end
预测个人评分的功能:
function sum = simon_funk_predict_rating(rating, f, bTrailing)
global item_features;
global respondent_features;
global max_features;
global init;
respondent = rating(1,1);
item = rating(1,2);
cache_value = rating(1,4);
sum = 1;
if(cache_value > 0)
sum = cache_value;
end
sum = sum + (item_features(f,item) * respondent_features(f,respondent));
if (sum > 10)
%sum = 10;
end
if (sum < 1)
%sum = 1;
end
if (bTrailing == 1)
sum = sum + (max_features - f - 1) * (init * init);
if (sum > 10)
sum = 10;
end
if (sum < 1)
sum = 1;
end
end
非常感谢!