加快Python中随机梯度上升的实施

时间:2017-12-27 13:29:24

标签: python recommendation-engine gradient-descent

我正在实施一个时间依赖的推荐系统,它应用BPR(贝叶斯个性化排名),其中Stochastic Gradient Ascent用于学习模型的参数。这样,一次迭代涉及随机抽样四倍(即userID,positive_item,negative_item,epoch_index) n 次,其中 n 是正反馈的总数(即给予所有项目的评级数量)。但是,由于我的实现需要花费太多时间来学习参数(因为它需要100次迭代才能学习参数),我想知道是否有办法改进我的代码并加快参数的学习过程。

在下面,请找到更新给定四元组参数的代码段(userID,positive_item,negative_item,epoch_index):

# updating factors
def updateFactors(self, userID, pos_item, neg_item, epoch):

    pos_feat = self.feat[pos_item].toarray()
    neg_feat = self.feat[neg_item].toarray()
    pos_feat = pos_feat[0] # a list of 4096 values
    neg_feat = neg_feat[0] # a list of 4096 values

    feat_i = [] # the features of the pos_item
    feat_j = [] # the features of the neg_item
    for i in range(4096):
        feat_i.append((i, pos_feat[i]))
        feat_j.append((i, neg_feat[i]))

    diff = []
    #sparse representation\
    p_i = 0
    p_j = 0
    while p_i < len(pos_feat) and p_j < len(neg_feat):
        ind_i = feat_i[p_i][0]
        ind_j = feat_j[p_j][0]
        if ind_i < ind_j:
            diff.append((ind_i, feat_i[p_i][1]))
            p_i = p_i + 1
        elif ind_i > ind_j:
            diff.append((ind_j, feat_j[p_j][1]))
            p_j = p_j + 1
        else:
            diff.append((ind_i, feat_i[p_i][1] - feat_j[p_j][1]))
            p_i = p_i + 1
            p_j = p_j + 1
    while p_i < len(feat_i):
        diff.append(feat_i[p_i])
    while p_j < len(feat_j):
        diff.append((feat_j[p_j][0], - feat_j[p_j][1]))

    len_of_diff = len(diff) # len_of_diff = 4096
    # E * (x_i - x_j)
    theta_i = np.zeros(self.K2) # K2 = 20
    theta_i_per_bin = np.zeros(self.K2)

    for r in range(self.K2): # K2 = 20 
        for ind in range(len_of_diff):
            c = diff[ind][0]
            feat_val = diff[ind][1]

            theta_i[r] += self.E[r][c] * feat_val
            theta_i_per_bin[r] += self.E_t[epoch][r][c] * feat_val

    visual_score = 0
    for k in range(self.K2): # K2 = 20
        visual_score += self.theta_user[userID][k] * (theta_i[k] * self.J_t[epoch][k] + theta_i_per_bin[k])


    visual_bias = 0
    for ind in range(len_of_diff):
        c = diff[ind][0]
        visual_bias += (self.betta_cnn[c] * self.C_t[epoch][c] +  self.betta_cnn_t[epoch][c]) * diff[ind][1] # epoch = 10


    # x_uij = prediction(user_id, pos_item_id, bin) - prediction(user_id, neg_item_id, bin);

    pos_i = self.itemIdToInt[pos_item]
    neg_i = self.itemIdToInt[neg_item]

    bi = self.b_i[pos_i]
    bj = self.b_i[neg_i]

    x_uij = bi - bj
    x_uij += np.inner(self.gamma_user[userID], self.gamma_item[pos_i]) - np.inner(self.gamma_user[userID], self.gamma_item[neg_i])
    x_uij += visual_score + visual_bias

    deri = 1/(1 + np.exp(x_uij))


    self.b_i[pos_i] += 0.005 * (deri - 1 * bi)
    self.b_i[neg_i] += 0.005 * (-deri - 1 * bj)

    # updating latent factors
    for k in range(self.K): # K = 20
        uf = self.gamma_user[userID][k]
        _if = self.gamma_item[pos_i][k]
        _jf = self.gamma_item[neg_i][k]

        self.gamma_user[userID][k] += 0.005 * (deri * (_if - _jf) - 1 * uf)
        self.gamma_item[pos_i][k] += 0.005 * (deri * uf - 1 * _if)
        self.gamma_item[neg_i][k] += 0.005 * (-deri * uf - 1/10.0 * _jf)

    # updating visual factors
    for k2 in range(self.K2): # K2 = 20
        v_uf = self.theta_user[userID][k2]
        j_t = self.J_t[epoch][k2]

        for ind in range(len_of_diff): # l3n_of_diff = 4096
            c = diff[ind][0]
            common = deri * v_uf * diff[ind][1]

            self.E[k2][c] += 0.005 * (common * j_t)
            self.E_t[epoch][k2][c] += 0.005 * (common - 0.0001 * self.E_t[epoch][k2][c])

        self.theta_user[userID][k2] += 0.005 * (deri * (theta_i[k2] * j_t + theta_i_per_bin[k2]) - 1 * v_uf)
        self.J_t[epoch][k2] += 0.005 * (deri * theta_i[k2] * v_uf - 0.0001 * j_t)

    for ind in range(len_of_diff):
        c = diff[ind][0]
        c_tf = self.C_t[epoch][c]
        b_cnn = self.betta_cnn[c]
        common = 0.005 * deri * diff[ind][1]

        self.betta_cnn[c] += common * c_tf
        self.C_t[epoch][c] += common * b_cnn - 0.005 * 0.0001 * c_tf
        self.betta_cnn_t[epoch][c] += common - 0.005 * 0.0001 * self.betta_cnn_t[epoch][c]

0 个答案:

没有答案