Simon Funk对比Matlab SVDS

时间:2014-06-04 10:04:51

标签: matlab recommendation-engine svd

我想使用Simon Funks的算法构建推荐系统。 我们的想法是首先在Matlab中离线构建模型,对结果进行一些评估,以获得最佳性能(或维度)。

我有一个用户x项的矩阵,其中等级'0'表示用户没有给该项目评分。

到目前为止,我已尝试在Matlab中实现Simon Funks的算法(参见源代码),但它的表现确实很差(!)。偶然我发现Matlab的SVDS功能填补了空的等级,它实际上比我的Simon Funks的功能更好,更快

有人能告诉我这可能是什么原因造成的?或者告诉我在Matlab函数中犯了什么愚蠢的错误? :P

function ratings = simon_funk(original_ratings, dimensions) % To construct a complete rating matrix

% See: http://www.timelydevelopment.com/demos/NetflixPrize.aspx

% Variables
global max_features;    max_features = dimensions;
global init;            init = 0.1;

min_epochs = 2;
max_epochs = 16;
min_improvement = 0.001;
lrate = 0.01;
k = 0.015;

respondent_count = size(original_ratings, 1);
item_count = size(original_ratings, 2);
rating_count = 0;

data = [];
for r=1:respondent_count
    for i=1:item_count
        rating = original_ratings(r,i);
        if(rating > 0)
            data = [data; [r, i, rating, 0]];
            rating_count = rating_count + 1;
        end    
     end
end
% Now data contain all ratings in the form [user_id, item_id, rating, cache (default 0)]

ratings = zeros(respondent_count, item_count);
global item_features;       item_features = zeros(max_features, item_count);
global respondent_features; respondent_features = zeros(max_features, respondent_count);

% Init
item_features(:,:) = init;
respondent_features(:,:) = init;

% CalcFeatures
rmse_last = 2.0;
rmse = 2.0;

for f=1:max_features
    for e=1:max_epochs
        sq = 0;
        rmse_last = rmse;


        for r=1:rating_count
           % for i=1:item_count

           respondent = data(r,1);
           item = data(r,2);
           rating = data(r,3);

            % Predict rating
            p = simon_funk_predict_rating(data(r,:), f, 1);
            err = (1.0 * rating - p);
            sq = sq + err*err;

            rf = respondent_features(f,respondent);
            mf = item_features(f, item);

            % Cross-train the features
            respondent_features(f, respondent) = respondent_features(f,respondent) + (lrate * (err * mf - k * rf));
            item_features(f, item) = item_features(f, item) + (lrate * (err * rf - k * mf));
           % end
        end

        rmse = sqrt(sq/rating_count);

        %if (e >= min_epochs && rmse > (rmse_last - min_improvement))
        if ((e >= min_epochs) && ((rmse_last - rmse) < min_improvement))
            break;
        end
    end
    % Caching
    for r=1:rating_count
        data(r, 4) = simon_funk_predict_rating(data(r,:), f, 0);
    end 
end

% return new ratings set
for r=1:respondent_count
    for i=1:item_count
        sum = 1;

        for f=1:max_features
            sum = sum + item_features(f,i) * respondent_features(f,r);
            if(sum > 10)
            %    sum = 10;
            end

            if(sum < 1)
             %   sum = 1;
            end

        end
        ratings(r,i) = sum;
    end
end

预测个人评分的功能:

function sum = simon_funk_predict_rating(rating, f, bTrailing)
global item_features;
global respondent_features;
global max_features;
global init;
respondent = rating(1,1);
item = rating(1,2);
cache_value = rating(1,4);

sum = 1;
if(cache_value > 0)
    sum = cache_value;
end

sum = sum + (item_features(f,item) * respondent_features(f,respondent));

if (sum > 10) 
    %sum = 10;
end

if (sum < 1) 
    %sum = 1;
end


if (bTrailing == 1)
    sum = sum + (max_features - f - 1) * (init * init);
    if (sum > 10) 
        sum = 10;
    end
    if (sum < 1) 
        sum = 1;
    end
end

非常感谢!

0 个答案:

没有答案